Python SohuItem примеры использования

Язык программирования: Python

Пространство имен/Пакет: sohu.items

Класс/Тип: SohuItem

Примеров на hotexamples.com: 7

Python SohuItem - 7 примеров найдено. Это лучшие примеры Python кода для sohu.items.SohuItem, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SohuItem(7)

Основные методы

SohuItem (7)

Пример #1

Показать файл

    def parse(self, response):
        def getdomain(url):
            #proto,rest=urllib.splittype(url)
            #host,rest=urllib.splithost(rest)
            return "http:"

        sel = scrapy.Selector(response)
        links_in_a_page = sel.xpath('//a[@href]')

        for link_sel in links_in_a_page:
            item = SohuItem()
            link = str(link_sel.re('href="(.*?)"')[0])

            if link:
                if not link.startswith('http'):
                    link = getdomain(response.url) + link

                yield scrapy.Request(link, callback=self.parse)

                p1 = re.compile(r'.*/a/.*')
                p2 = re.compile(r'.*#comment_area$')
                p3 = re.compile(r'.*news.sohu.com.*s?html?$')

                if (re.match(p3, link)
                        or re.match(p1, link)) and (not re.match(p2, link)):
                    #print ('T: '+link)
                    item['link'] = link
                    yield item
                else:
                    pass

Пример #2

Показать файл

Файл: mysohu.py Проект: loveCanopy/Scrapy_Demo

 def parse_item(self, response):
     items = SohuItem()
     hxs = HtmlXPathSelector(response)
     #打开渲染
     self.driver.get(response.url)
     # 点击展开信息按钮
     self.driver.find_element_by_class_name("info-arrT").click()
     items['name'] = self.driver.find_element_by_xpath(
         '//div[@class="crumbs"]/a[last()]').text
     items['fav'] = self.driver.find_element_by_xpath(
         '//div[@class="vBox vBox-ding"]//i').text
     items['step'] = self.driver.find_element_by_xpath(
         '//div[@class="vBox vBox-cai"]//i').text
     items['playcounts'] = self.driver.find_element_by_xpath(
         '//div[@class="vBox vBox-play vBox-play-panel"]//i').text
     items['actor'] = self.driver.find_element_by_xpath(
         '//div[@class="info info-con"]//a[@data-pb-other="actor"]').text
     items['director'] = self.driver.find_element_by_xpath(
         '//div[@class="info info-con"]//a[@data-pb-other="director"]').text
     items['introduce'] = self.driver.find_element_by_xpath(
         '//div[@class="info info-con"]/p[@class="intro open"]').text
     items['time'] = self.driver.find_element_by_xpath(
         '//li[@style="display: list-item;"]').text
     items['area'] = self.driver.find_element_by_xpath(
         '//a[@data-pb-other="area"]').text
     items['type'] = self.driver.find_element_by_xpath(
         '//a[@data-pb-other="category"]').text
     items['year'] = self.driver.find_element_by_xpath(
         '//a[@data-pb-other="year"]').text
     yield items

Пример #3

Показать файл

 def parse(self, response):
     for sel in response.xpath('//div[@class = "focus-news-box"]/div/div[@class = "list16"][1]/ul/li'):
         item = SohuItem()
         item['title'] = sel.xpath('.//a/@title').extract()
         item['href'] = sel.xpath('.//a/@href').extract()
         #yield item
         yield scrapy.Request(response.urljoin(item['href'][0]), meta={'item': item}, callback=self.parse_content)

Пример #4

Показать файл

 def parse_url(self, response):
     item = SohuItem()
     item["context"] = response.xpath(
         '//article[@class="article"]/p/text()').extract()
     print(item["context"])
     # title=item["context"][0]
     #fh = open("D:/file/python_learning/CH5/news/" + str(self.times) + ".txt", "wb")
     self.times += 1
     #print(self.times)
     #fh.write(context)
     yield item

Пример #5

Показать файл

 def get_tab(self, response, category):
     items = []
     for group in response.xpath('./ul[@class="rList"]'):
         for rank in group.xpath('./li'):
             item = SohuItem()
             item['rank_category'] = category
             item['rank_name'] = rank.xpath(
                 './div[@class="vName"]/div/a/text()').extract()
             item['rank_type'] = rank.xpath(
                 './span[@class="vSort"]/a/text()').extract()
             item['rank_index'] = rank.xpath(
                 './span[@class="vTotal"]/text()').extract()
             item['rank_trend'] = rank.xpath(
                 './span[@class="vRank"]/@title').extract()
             items.append(item)
     return items

Пример #6

Показать файл

 def get_tab1(self, response, category):
     item = SohuItem()
     item['rank_category'] = category
     item['rank_name'] = response.xpath(
         './ul[@class="rList"]/li[@class="No1"]/div[@class="cfix"]/div[@class="vName"]/a/@title'
     ).extract()
     item['rank_type'] = response.xpath(
         './ul[@class="rList"]/li[@class="No1"]/div[@class="cfix"]/span[@class="vSort"]/a/text()'
     ).extract()
     item['rank_index'] = response.xpath(
         './ul[@class="rList"]/li[@class="No1"]/div[@class="cfix"]/span[@class="vTotal"]/text()'
     ).extract()
     item['rank_trend'] = response.xpath(
         './ul[@class="rList"]/li[@class="No1"]/div[@class="cfix"]/span[@class="vRank"]/@title'
     ).extract()
     return item

Пример #7

Показать файл

Файл: sohu_spider.py Проект: mej013/data_scraping

 def parse(self, response):
     for agent in response.xpath('//div[@class="realtor-info"]'):
         item = SohuItem()
         item['name'] = agent.xpath(
             './/a[@class="realtor-info-name"]/text()').extract_first()
         item['company'] = agent.xpath(
             './/div[@class="rcompany"]//span/text()').extract()[1]
         listR = agent.xpath('.//div[@class="rparks"]//text()').extract()
         predict = str(listR[0]) + str(listR[1]) + str(listR[2])
         listR = predict + "/".join(
             [str(listR[i]) for i in range(3,
                                           len(listR) - 1)])
         listR = listR.replace('\n', "").replace(" ", "").replace("//", "/")
         item['region'] = listR
         reId = agent.xpath('.//a[@class="realtor-info-name"]/@href'
                            ).extract_first().strip()
         reId = str(reId)
         reId = re.findall('[1-9]\d*', reId)[0]
         href = "http://esf.focus.cn/api/getVirtualPhone?realtorId=" + reId + "&ecoCityId=73&call_url=.html"
         yield scrapy.Request(href,
                              meta={'item': item},
                              callback=self.parse_phone)