Exemplo n.º 1
0
 def parse(self, response):
     print(response.url)
     tags = Selector(response=response).xpath('//dd')
     for tag in tags:
         item = MoviesItem()
         name = tag.css('p.name a::text').extract_first().strip()
         link = 'http://maoyan.com'+tag.css('a::attr(href)').extract_first()
         releasetime = tag.css('.releasetime::text').extract_first().strip()
         score = tag.css('i.integer::text').extract_first().strip()
         score += tag.css('i.fraction::text').extract_first().strip()        
         item['name'] = name
         item['releasetime'] = releasetime
         item['score'] = score
         yield scrapy.Request(url=link, meta={'item': item}, callback=self.parse2)
Exemplo n.º 2
0
    def parse(self, response, **kwargs):
        item = MoviesItem()
        content = Selector(
            response=response).xpath('//div[@class="movie-hover-info"]')
        for movie in content[:10]:
            file_name = movie.xpath('./div[1]/span/text()').get()
            file_types = movie.xpath('./div[2]/text()')[-1].get().strip()
            file_date = movie.xpath('./div[4]/text()')[-1].get().strip()

            # print(file_name, file_types, file_date)
            item['file_name'] = file_name
            item['file_types'] = file_types
            item['file_date'] = file_date
            yield item  # 一条一条数据返回,否则,结果就只有最后一条数据
Exemplo n.º 3
0
 def parse(self, response):
     global index
     data = json.loads(response.body.decode())
     data = data['data']
     if len(data)>0:
         for i in data:
             item = MoviesItem()
             item['title'] = i['title']
             item['rate'] = i['rate']
             item['star'] = i['star']
             url = i['url']
             yield scrapy.Request(url, callback=self.parse_detail, meta={'item':deepcopy(item)})
         next_url = 'https://movie.douban.com/j/new_search_subjects?sort=U&range=0,10&tags=%E7%94%B5%E5%BD%B1&start={}&year_range=2019,2019'.format(index*20)
         index += 1
         yield scrapy.Request(next_url, callback=self.parse)
Exemplo n.º 4
0
    def parse(self, response):
        self.driver.get('http://www.the-numbers.com/movie/budgets/all')
        response = TextResponse(url=response.url, body=self.driver.page_source, encoding='utf-8')
        rows = response.xpath('//*[@id="page_filling_chart"]/center/table/tbody/tr').extract()

        for i in range(1, 10250, 2):
            RDate = Selector(text=rows[i]).xpath('//td[2]/a/text()').extract()
            Title = Selector(text=rows[i]).xpath('//td[3]/b/a/text()').extract()
            PBudget = Selector(text=rows[i]).xpath('//td[4]/text()').extract()
            DomesticG = Selector(text=rows[i]).xpath('//td[5]/text()').extract()
            WorldwideG = Selector(text=rows[i]).xpath('//td[6]/text()').extract()

            print RDate, Title, PBudget, DomesticG, WorldwideG

            item = MoviesItem()
            item['RDate'] = RDate
            item['Title'] = Title
            item['PBudget'] = PBudget
            item['DomesticG'] = DomesticG
            item['WorldwideG'] = WorldwideG

            yield item
Exemplo n.º 5
0
    def parse(self, response):
        print("--------------")
        print(response.url)
        print("--------------")
        i = 0
        movie_div = Selector(response=response).xpath('//div[@class="movie-hover-info"]')
        for tags in movie_div:
            item = MoviesItem()
            title_element = tags.xpath('./div/span[1]/text()')
            movie_type_element = tags.xpath('./div[2]/text()')
            movie_date_element = tags.xpath('./div[4]/text()')
            # 分别提取电影名称、类型、上映日期
            title = title_element.extract()[0]
            movie_type = self.process_data(movie_type_element.extract()[1])
            movie_date = self.process_data(movie_date_element.extract()[1])
            item['title'] = title
            item['movie_type'] = movie_type
            item['movie_date'] = movie_date

            if i < 10 :
                i += 1
                yield item
            else:
                break