Python Selector примеры использования

Язык программирования: Python

Пространство имен/Пакет: xpaw

Класс/Тип: Selector

Примеров на hotexamples.com: 15

Python Selector - 15 примеров найдено. Это лучшие примеры Python кода для xpaw.Selector, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Selector(15)

css(13)

xpath(3)

Основные методы

Selector (15)

css (13)

xpath (3)

Пример #1

Показать файл

 def extract_results(self, response):
     selector = Selector(response.text)
     for item in selector.css('li.b_algo'):
         title = item.css('h2>a')[0].text.strip()
         text = None
         span = item.css('div.b_caption>p')
         if len(span) > 0:
             text = span[0].text.strip()
         url = item.css('h2>a')[0].attr('href').strip()
         if text is not None:
             yield {'title': title, 'text': text, 'url': url}

Пример #2

Показать файл

 def extract_results(self, response):
     selector = Selector(response.text)
     for item in selector.css('div.g'):
         title = item.css('h3')[0].text.strip()
         text = None
         span_st = item.css('span.st')
         if len(span_st) > 0:
             text = span_st[0].text.strip()
         url = item.css('div.r>a')[0].attr('href').strip()
         if text is not None:
             yield {'title': title, 'text': text, 'url': url}

Пример #3

Показать файл

 def extract_results(self, response):
     selector = Selector(response.text)
     for item in selector.css('div.result'):
         title = item.css('h3>a')[0].text.strip()
         text = None
         abstract = item.css('div.c-abstract')
         if len(abstract) > 0:
             text = abstract[0].text.strip()
         url = item.css('h3>a')[0].attr('href').strip()
         if text is not None:
             yield {'title': title, 'text': text, 'url': url}

Пример #4

Показать файл

 def extract_results(self, response):
     selector = Selector(response.text)
     for item in selector.css('li.algo-result'):
         title = item.css('a.algo-title')[0].text.strip()
         text = None
         span = item.css('span.algo-summary')
         if len(span) > 0:
             text = span[0].text.strip()
         url = item.css('a.algo-title')[0].attr('href').strip()
         if text is not None:
             yield {'title': title, 'text': text, 'url': url}

Пример #5

Показать файл

Файл: yahoo.py Проект: jadbin/metase

 def extract_results(self, response):
     selector = Selector(response.text)
     for item in selector.css('div.algo-sr'):
         title = item.css('h3>a')[0].text.strip()
         text = None
         p_lh_l = item.css('p.lh-l')
         if len(p_lh_l) > 0:
             text = p_lh_l[0].text.strip()
         url = item.css('h3>a')[0].attr('href').strip()
         url = unquote(self.yahoo_url_reg.search(url).group(1))
         if text is not None:
             yield {'title': title, 'text': text, 'url': url}

Пример #6

Показать файл

Файл: chinaso.py Проект: jadbin/metase

 def extract_results(self, response):
     selector = Selector(response.text)
     for item in selector.css('li.reItem'):
         a = item.css('h2>a')
         if len(a) <= 0:
             continue
         title = a[0].text.strip()
         text = None
         div = item.css('div.reNewsWrapper')
         if len(div) > 0:
             text = div[0].text.strip().split('\n')[0]
         url = urljoin('http://www.chinaso.com/search/',
                       item.css('h2>a')[0].attr('href').strip())
         if text is not None:
             yield {'title': title, 'text': text, 'url': url}

Пример #7

Показать файл

Файл: xpath_quote_spider.py Проект: jadbin/xpaw

 def parse(self, response):
     selector = Selector(response.text)
     for quote in selector.xpath('//div[@class="quote"]'):
         text = quote.xpath('.//span[@itemprop="text"]')[0].text
         author = quote.xpath('.//small[@itemprop="author"]')[0].text
         author_url = quote.xpath('.//span/a/@href')[0].text
         author_url = urljoin(str(response.url), author_url)
         tags = quote.xpath('.//div[@class="tags"]/a').text
         self.log(
             'quote: %s',
             dict(text=text,
                  tags=tags,
                  author=author,
                  author_url=author_url))
     next_page_url = selector.xpath('//li[@class="next"]/a/@href')[0].text
     self.log('next page url: %s', next_page_url)

Пример #8

Показать файл

Файл: quote_spider.py Проект: jadbin/xpaw

 def parse(self, response):
     selector = Selector(response.text)
     for quote in selector.css('div.quote'):
         text = quote.css('span.text')[0].text
         author = quote.css('small.author')[0].text
         author_url = quote.css('small+a')[0].attr('href')
         author_url = urljoin(str(response.url), author_url)
         tags = quote.css('div.tags a').text
         self.quotes.append(
             dict(text=text,
                  tags=tags,
                  author=author,
                  author_url=author_url))
     next_page = selector.css('li.next a')
     if len(next_page) > 0:
         next_page_url = urljoin(str(response.url),
                                 next_page[0].attr('href'))
         yield HttpRequest(next_page_url, callback=self.parse)

Пример #9

Показать файл

Файл: so.py Проект: jadbin/metase

 def extract_results(self, response):
     selector = Selector(response.text)
     for item in selector.css('li.res-list'):
         title = item.css('h3>a')[0].text.strip()
         text = None
         res_desc = item.css('p.res-desc')
         if len(res_desc) > 0:
             text = res_desc[0].text.strip()
         else:
             res_rich = item.css('div.res-rich')
             if len(res_rich) > 0:
                 text = res_rich[0].text.strip()
         h3_a = item.css('h3>a')[0]
         url = h3_a.attr('data-url')
         if not url:
             url = h3_a.attr('href').strip()
         if text is not None:
             yield {'title': title, 'text': text, 'url': url}

Пример #10

Показать файл

Файл: sogou.py Проект: jadbin/metase

 def extract_results(self, response):
     selector = Selector(response.text)
     for item in selector.css('div.vrwrap,div.rb'):
         h = item.css('h3>a')
         if len(h) <= 0:
             continue
         title = h[0].text.strip()
         text = None
         div_ft = item.css('div.ft')
         if len(div_ft) > 0:
             text = div_ft[0].text.strip()
         else:
             p_str = item.css('p.str_info')
             if len(p_str) > 0:
                 text = p_str[0].text.strip()
         url = urljoin('https://www.sogou.com/', item.css('h3>a')[0].attr('href').strip())
         if text is not None:
             yield {'title': title, 'text': text, 'url': url}

Пример #11

Показать файл

 def parse(self, response):
     selector = Selector(response.text)
     hot = selector.css("div.hotnews a").text
     self.log("Hot News:")
     for i in range(len(hot)):
         self.log("%s: %s", i + 1, hot[i])

Пример #12

Показать файл

 async def parse(self, response):
     selector = Selector(response.text)
     tags = selector.xpath("//div[contains(@class, 'tags-box')]//a").text
     self.log("Top ten tags: %s", tags)
     yield HttpRequest("http://quotes.toscrape.com/", callback=self.parse)

Пример #13

Показать файл

Файл: selector_usage.py Проект: jadbin/xpaw

        <a href="/author/J-K-Rowling">(about)</a>
        </span>
        <div class="tags">
            Tags:
            <a class="tag" href="/tag/abilities/page/1/">abilities</a>
            <a class="tag" href="/tag/choices/page/1/">choices</a>
        </div>
    </div>
</body>
</html>
'''

if __name__ == '__main__':
    from xpaw import Selector

    selector = Selector(text)

    print('# CSS Selector, content of quotes:')
    for quote in selector.css('div.quote'):
        print(quote.css('span.text')[0].text)

    print('# XPath, content of quotes:')
    for quote in selector.xpath('//div[@class="quote"]'):
        print(quote.xpath('.//span[@class="text"]')[0].text)

    print('# CSS Selector, content of quotes, with HTML tags:')
    for quote in selector.css('div.quote'):
        print(quote.css('span.text')[0].string)

    print('# CSS Selector, quote tags')
    for quote in selector.css('div.quote'):

Пример #14

Показать файл

Файл: search.py Проект: jadbin/elgoog

def parse_results(engine, resp):
    res = []
    selector = Selector(resp.text)
    if engine == 'Google':
        if 'google.com.hk/sorry/' in resp.url:
            raise ServiceBanError
        topstuff = selector.css('#topstuff').text
        if len(topstuff) > 0 and '未找到符合' in topstuff[0]:
            pass
        else:
            for item in selector.css('div.g'):
                try:
                    title = item.css('h3')[0].text.strip()
                    text = None
                    span_st = item.css('span.st')
                    if len(span_st) > 0:
                        text = span_st[0].text.strip()
                    url = item.css('div.r>a')[0].attr('href').strip()
                    if text is not None:
                        res.append({'title': title, 'text': text, 'url': url})
                except Exception:
                    pass
    elif engine == 'Yahoo':
        for item in selector.css('div.algo-sr'):
            try:
                title = item.css('h3>a')[0].text.strip()
                text = None
                p_lh_l = item.css('p.lh-l')
                if len(p_lh_l) > 0:
                    text = p_lh_l[0].text.strip()
                url = item.css('h3>a')[0].attr('href').strip()
                url = unquote(yahoo_url_reg.search(url).group(1))
                if text is not None:
                    res.append({'title': title, 'text': text, 'url': url})
            except Exception:
                pass
    elif engine == 'Ask':
        for item in selector.css('li.algo-result'):
            try:
                title = item.css('a.algo-title')[0].text.strip()
                text = None
                span = item.css('span.algo-summary')
                if len(span) > 0:
                    text = span[0].text.strip()
                url = item.css('a.algo-title')[0].attr('href').strip()
                if text is not None:
                    res.append({'title': title, 'text': text, 'url': url})
            except Exception:
                pass
    elif engine == 'Bing':
        for item in selector.css('li.b_algo'):
            try:
                title = item.css('h2>a')[0].text.strip()
                text = None
                span = item.css('div.b_caption>p')
                if len(span) > 0:
                    text = span[0].text.strip()
                url = item.css('h2>a')[0].attr('href').strip()
                if text is not None:
                    res.append({'title': title, 'text': text, 'url': url})
            except Exception:
                pass
    return res

Пример #15

Показать файл

Файл: rendering_spider.py Проект: jadbin/xpaw

 def parse(self, response):
     selector = Selector(response.text)
     for quote in selector.css('div.quote'):
         text = quote.css('span.text')[0].text
         author = quote.css('small.author')[0].text
         self.log(author + ": " + text)