示例#1
0
    def parse(self,response):
        # 设置编码方式为GBK
        response.encoding = 'GBK'

        # result=response.re_find_all('{channel\s*:\s*{title\s*:\s*"(.+?)",')
        print(response.url)
        return Item(response.url)
示例#2
0
 def parse_detail(self, response):
     # 获取上一个解析函数传递过来的数据
     data = response.meta['data']
     data['movie_length'] = response.xpath(
         '//span[@property="v:runtime"]/text()')
     # 返回结果
     return Item(data)
示例#3
0
 def parse(self, response):
     '''解析豆瓣电影top250列表页'''
     title_list = []  # 存储所有的
     for li in response.xpath("//ol[@class='grid_view']/li"):  # 遍历每一个li标签
         title = li.xpath(
             ".//span[@class='title'][1]/text()")  # 提取该li标下的 标题
         title_list.append(title[0])
     yield Item(title_list)
示例#4
0
    def parse(self, response):
        # item = {}
        # item['title'] = response.xpath("//head/title/text()")[0]
        # yield Item(item)

        node_list = response.xpath("//div[@class='hd']")[:3]
        for node in node_list:
            item = {}
            item['page_title'] = node.xpath("./a/span/text()")[0]
            item['page_link'] = node.xpath("./a/@href")[0]
            # Item数据,交给管道
            yield Item(item)
示例#5
0
    def parse(self, response):
        # item = {}
        # item['title'] = response.xpath("//head/title/text()")[0]
        # yield Item(item)

        node_list = response.xpath("//div[@class='hd']")[:3]
        for node in node_list:
            item = {}
            item['page_title'] = node.xpath("./a/span/text()")[0]
            item['page_link'] = node.xpath("./a/@href")[0]
            # Item数据,交给管道
            yield Item(item)
            # Request对象,Engine发送,并由指定的回调函数parse_page解析
            yield Request(item['page_link'], callback="parse_page")
示例#6
0
 def parse_page(self, response):
     print("[parse_page] : [{}] <{}>".format(response.status_code,
                                             response.url))
     yield Item({})
示例#7
0
 def parse(self, response):
     item = {}
     item['title'] = response.xpath("//head/title/text()")[0]
     yield Item(item)
示例#8
0
    def parse_detail(self, response):
        data = response.meta['data']
        data['movie_length'] = response.xpath(
            '//span[@property="v:runtime"]/text()')

        return Item(data)
示例#9
0
 def parse(self, response):
     return Item(response.url)