Exemplo n.º 1
0
 def start_requests(self):
     yield feapder.Request("https://maoyan.com/", render=True)
Exemplo n.º 2
0
 def start_requests(self, task):
     task_id = task[0]
     url = task[1]
     yield feapder.Request(url, task_id=task_id)
Exemplo n.º 3
0
 def start_requests(self):
     '''
     生产任务
     '''
     yield feapder.Request("https://www.baidu.com")
Exemplo n.º 4
0
 def start_requests(self, task):
     task_id, item_id = task
     url = "https://item.jd.com/{}.html".format(item_id)
     yield feapder.Request(url, task_id=task_id)  # 携带task_id字段
Exemplo n.º 5
0
 def start_requests(self, *args, **kws):
     for i in range(200):
         print(i)
         yield feapder.Request("https://www.baidu.com")
Exemplo n.º 6
0
 def start_requests(self):
     for i in range(100):
         print(f"下发任务 {i}")
         yield feapder.Request(f"https://www.baidu.com?p={i}")
Exemplo n.º 7
0
 def start_requests(self):
     yield feapder.Request(f"https://www.baidu.com")
Exemplo n.º 8
0
 def start_requests(self, task):
     yield feapder.Request(task.url, task_id=task.id)
Exemplo n.º 9
0
 def start_requests(self):
     for page_num in range(1, 2):
         url = "https://www.qiushibaike.com/8hr/page/{}/".format(page_num)
         yield feapder.Request(url)
Exemplo n.º 10
0
 def start_requests(self):
     for i in range(10):
         yield feapder.Request("https://news.qq.com/#{}".format(i),
                               render=True)
Exemplo n.º 11
0
 def start_requests(self):
     yield feapder.Request("https://news.qq.com/")
Exemplo n.º 12
0
    def parse(self, request, response):
        movie_list = response.xpath("//dl[@class='movie-list']//dd")
        for movie in movie_list:
            cover = movie.xpath(
                './div[@class="movie-item film-channel"]//div[@class="movie-poster"]/img[2]/@data-src'
            ).extract_first()
            name = movie.xpath(
                './div[@class="channel-detail movie-item-title"]/@title'
            ).extract_first()
            url = movie.xpath(
                './div[@class="channel-detail movie-item-title"]/a/@href'
            ).extract_first()
            score = movie.xpath(
                'string(./div[@class="channel-detail channel-detail-orange"])'
            ).extract_first()
            movie_type = movie.re_first("类型:</span>(.*?)<", default="").strip()
            main_actor = movie.re_first("主演:</span>(.*?)<", default="").strip()
            release_date = movie.re_first("上映时间:</span>(.*?)<",
                                          default="").strip()
            imax = movie.xpath(
                './div[@class="movie-item film-channel"]//div[@class="movie-ver"]/i/@class'
            ).extract_first()
            movie_id = url.split("/")[-1]

            item = maoyan_hot_movie_list_item.MaoyanHotMovieListItem()
            item.name = name
            item.movie_id = movie_id
            item.cover = cover
            item.url = url
            item.score = score
            item.movie_type = movie_type
            item.main_actor = main_actor
            item.release_date = release_date
            item.imax = imax
            item.city_id = request.city_id
            item.crawl_time = tools.get_current_date()
            yield item

            # 电影详情任务
            for brand_id in setting.BRAND_IDS:
                detail_task_item = (
                    maoyan_film_detail_task_item.MaoyanFilmDetailTaskItem())
                detail_task_item.movie_id = movie_id  # 电影id
                detail_task_item.city_id = request.city_id  # 城市id
                detail_task_item.brand_id = brand_id  # 品牌 -1表示全部
                detail_task_item.show_date = tools.get_current_date(
                    "%Y-%m-%d")  # 日期
                yield detail_task_item

        # 翻页
        if request.page == 1:
            total_page = response.xpath(
                '//ul[@class="list-pager"]//li[last()-1]/a/text()'
            ).extract_first()
            if total_page:
                total_page = int(total_page)
                for page in range(2, total_page + 1):
                    yield feapder.Request(page=page,
                                          city_id=request.city_id,
                                          task_id=request.task_id)

            # 更新任务
            yield self.update_task_batch(request.task_id, 1)
Exemplo n.º 13
0
 def start_requests(self, task):
     task_id, city_id = task
     yield feapder.Request(city_id=city_id, page=1, task_id=task_id)
Exemplo n.º 14
0
 def start_requests(self, task):
     task_id, url = task
     yield feapder.Request(url, task_id=task_id, render=True)
Exemplo n.º 15
0
 def start_requests(self):
     """
     注意 这里继承的是BaseParser,而不是Spider
     """
     yield feapder.Request("https://news.sina.com.cn/")
Exemplo n.º 16
0
 def start_requests(self, *args, **kws):
     yield feapder.Request("https://www.baidu.com")
Exemplo n.º 17
0
 def start_requests(self):
     for i in range(1):
         yield feapder.Request(f"https://www.baidu.com#{i}", callback=self.parse)
Exemplo n.º 18
0
 def start_requests(self):
     yield feapder.Request("https://tophub.today/", download_midware=self.download_midware)
Exemplo n.º 19
0
 def start_requests(self):
     for i in range(100):
         yield feapder.Request("https://www.baidu.com#{}".format(i))