Python FirstItem示例，first.items.FirstItem Python示例

示例#1

0

显示文件

文件： qsbk.py 项目： KEVINWANGXIANG/Scrapy_and_Data_analyse

 def parse(self, response):
     it = FirstItem()
     it["content"] = response.xpath(
         '//div[@class="content"]/span/text()').extract()
     it["link"] = response.xpath(
         '//a[@class="contentHerf"]/@href').extract()
     yield it

示例#2

0

显示文件

 def parse(self, response):
     it = FirstItem()
     it["content"] = response.xpath(
         "//div[@class='content']/span/text()").extract()
     it["link"] = response.xpath(
         "//a[@class='contentHerf']/@href").extract()
     yield it

示例#3

0

显示文件

文件： schumacher.py 项目： elaa0505/selenium-scrapy-crawlers

    def parse_list(self, response):
        json_str = ''
        try:
            json_str = response.body.split(
                '<product-frontend-list :products="')[1].split(
                    '" :per-page="36"></product-frontend-list>')[0]
        except Exception as err:
            print err

        if json_str != '':
            json_str = self.validateStr(json_str)
            element_list = json.loads(json_str)
            for element in element_list['data']:
                item = FirstItem()

                item['sku'] = element['sku']
                item['category'] = ''
                if 'Fabric' in element['sku']:
                    item['category'] = 'Fabric'
                if 'Wallpaper' in element['sku']:
                    item['category'] = 'Wallpaper'
                item['name'] = element['name']
                item['price'] = element['our_price']['decimal']

                yield item

示例#4

0

显示文件

文件： qsbk.py 项目： Chunge135/Python_System

 def parse(self, response):  #爬虫正文部分
     it = FirstItem()
     it["content"] = response.xpath(
         "//a[@class='recmd-content']/text()").extract()  #利用xpath来筛选文本信息。
     it["link"] = response.xpath(
         "//a[@class='recmd-content']/@href").extract()  #利用xpath来筛选链接信息。
     yield it

示例#5

0

显示文件

文件： firstspider.py 项目： DeanRiddle/Spyder-first

    def parse(self, response):
        for each in response.xpath(
                "//tr[@class='evenrow'] | //tr[@class='oddrow']"):
            # 初始化模型对象
            item = FirstItem()

            item['runid'] = each.xpath("./td[1]/text()").extract()[0]

            item['username'] = each.xpath("./td[2]/a/text()").extract()[0]

            item['problem_id'] = each.xpath(
                "./td[3]/div/a/text()").extract()[0]

            item['result'] = each.xpath("./td[4]/a/text()").extract()[0]

            item['memory'] = each.xpath("./td[5]/div/text()").extract()[0]

            item['timer'] = each.xpath("./td[6]/div/text()").extract()[0]
            item['lang'] = each.xpath("./td[7]/text()").extract()[0]
            item['codel'] = each.xpath("./td[8]/text()").extract()[0]

            item['sub_time'] = each.xpath("./td[9]/text()").extract()[0]

            yield item
        if self.offset < 1809510:
            self.offset += 99

        yield scrapy.Request(self.url + str(self.offset), callback=self.parse)

示例#6

0

显示文件

    def parse(self, response):
        List_1 = []
        item = FirstItem()
        item["name"] = "qzy"
        item["money"] = "12345"
        List_1.append(item)

        return List_1

示例#7

0

显示文件

 def parse(self, response):
     item = FirstItem()
     if json.loads(response.text)['pages'] == 0:
         pass
     else:
         item['dm'] = response.url[len(response.url) - 6:]
         item['data'] = json.loads(response.text)['data']
     return item

示例#8

0

显示文件

 def parse(self, response):
     '''
     固定格式
     content = response.xpath("").extract()
     '''
     item = FirstItem()
     item['content'] = response.xpath("/html/head/title/text()").extract()
     yield item

示例#9

0

显示文件

 def parse(self, response):
     print("##############")
     #print(response.body.decode("utf-8"))
     for each in response.xpath("//tr[@class='even']|//tr[@class='odd']"):
         item = FirstItem()
         item['positionName'] = each.xpath('./td[1]/a/text()').extract()[0]
         item['positionLink'] = "https://hr.tencent.com/" + each.xpath(
             './td[1]/a/@href').extract()[0]
         item['positionType'] = each.xpath('./td[2]/text()').extract()[0]
         yield item  #返回给pipelines process_item

示例#10

0

显示文件

 def parse(self, response):
     for line in response.xpath('//li[@class=" j_thread_list clearfix"]'):
         item = FirstItem()
         item['title'] = line.xpath(
             './/div[contains(@class,"threadlist_title pull_left j_th_tit ")]/a/text()'
         ).extract()
         item['author'] = line.xpath(
             './/div[contains(@class,"threadlist_author pull_right")]//span[contains(@class,"frs-author-name-wrap")]/a/text()'
         ).extract()
         item['reply'] = line.xpath(
             './/div[contains(@class,"col2_left j_threadlist_li_left")]/span/text()'
         ).extract()
         yield item

示例#11

0

显示文件

文件： zhihuspider.py 项目： NeonHeathcliff/Helloworld

 def parse(self, response):
     a = 1
     for line in response.xpath('//li[@class="List-item"]'):
         item = FirstItem()
         item['answer'] = a
         item['title'] = line.xpath(
             './/div[contains(@class,"ContentItem AnswerItem")]//h2/text()'
         ).extract()
         item['word'] = line.xpath(
             './/div[contains(@class,"ContentItem AnswerItem")]//p/text()'
         ).extract()
         a += 1
         yield item

示例#12

0

显示文件

文件： baidu_spider.py 项目： wlcomeforyou/pythonScrapy

 def parse(self, response):
     qiushi = response.xpath(
         "//div[@id='content']//div[contains(@class, 'article')]")
     for qs in qiushi:
         author = qs.xpath(
             "./div[contains(@class, 'author')]//h2/text()").extract()
         content = ''.join(
             qs.xpath(
                 "./a[contains(@class, 'contentHerf')]/div/span//text()").
             extract())
         item = FirstItem()
         item['author'] = author
         item['content'] = content
         yield item

示例#13

0

显示文件

文件： crawler2s.py 项目： shuixi2013/crawler-1

    def parse(self, response):
        for each in response.xpath("//tr[@class='even']|//tr[@class='odd']"):
            item = FirstItem()
            item['positionName'] = each.xpath('./td[1]/a/text()').extract()[0]
            item['positionLink'] = "https://hr.tencent.com/"+each.xpath('./td[1]/a/@href').extract()[0]
            item['positionType'] = each.xpath('./td[2]/text()').extract()[0]
            yield item #返回给pipelines process_item

        if self.offset < 540:
            self.offset += 10
            nextPageUrl = self.url + str(self.offset) + "#a"
        else:
            return

        yield scrapy.Request(nextPageUrl, callback = self.parse)

示例#14

0

显示文件

文件： project.py 项目： hqisog/scrapy_project

    def parse(self, response):
        ip_list = response.xpath('//table[@id="ip_list"]')
        trs = ip_list[0].xpath('tr')
        items = []

        for ip in trs[1:]:
            pre_item = FirstItem()

            pre_item['IP'] = ip.xpath('td[2]/text()')[0].extract()
            pre_item['PORT'] = ip.xpath('td[3]/text()')[0].extract()
            pre_item['POSITION'] = ip.xpath(
                'string(td[4])')[0].extract().strip()
            pre_item['TYPE'] = ip.xpath('td[6]/text()').extract()
            pre_item['SPEED'] = ip.xpath('td[7]/div[@class="bar"]/@title').re(
                '\d{0,2}\.\d{0,}')[0]
            pre_item['LAST_CHECK_TIME'] = ip.xpath('td[9]/text()')[0].extract()
            print pre_item
            items.append(pre_item)

            return items

示例#15

0

显示文件

文件： csdn.py 项目： piedpiper7/proza

 def parse(self, response):
     item=FirstItem()
     item['title']=response.xpath('//h2[@class="csdn-tracking-statistics"]/a/text()').extract()
     #item['detail']=response.xpath('').extract()
     item['link']=response.xpath('//h2[@class="csdn-tracking-statistics"]/a/@href').extract()
     yield item

示例#16

0

显示文件

 def parse(self, response):
     # pass
     item = FirstItem()
     item['content'] = response.xpath('/html/head/title/text()').extract()
     yield item

示例#17

0

显示文件

 def parse(self, response):
     item = FirstItem()
     item["content"] = response.xpath(" /html/head/title/text()").extract()
     yield item

示例#18

0

显示文件

文件： baidu.py 项目： piedpiper7/proza

 def parse(self, response):
     item = FirstItem()
     item['title'] = response.xpath('/html/head/title/text()').extract()
     print(item['title'])
     yield item