Пример #1
0
 def start_requests(self):
     db.attach(self)
     query = self.session.query(db.Question)
     for question in query.all():
         for i in range(0, min(int(question.answer_count), 100), 20):
             yield scrapy.Request(self.url_pattern % (question.id, i),
                                  headers={'Authorization': self.authorization})
Пример #2
0
 def start_requests(self):
     db.attach(self)
     for artist in self.session.query(db.Artist).all():
         request = scrapy.Request(self.url_pattern % artist.artist_name, headers={
             'Authorization': self.authorization},
                                  dont_filter=True)
         request.meta['artist'] = artist
         yield request
Пример #3
0
 def start_requests(self):
     db.attach(self)
     query = self.session.query(db.Topic)
     for topic in query.all():
         for i in range(0, int(topic.followers_count), 20):
             request = scrapy.Request(
                 'https://www.zhihu.com/api/v4/topics/%s/followers?limit=20&offset=%d'
                 % (topic.id, i),
                 headers={'Authorization': self.authorization})
             request.meta['topic'] = topic
             yield request
Пример #4
0
 def start_requests(self):
     db.attach(self)
     query = self.session.query(
         db.User).filter(db.User.channel == db.CHANNEL)
     for user in query.all():
         if not any([
                 user.business is None, user.educations is None,
                 user.employments is None
         ]):
             yield scrapy.Request(
                 self.url_pattern % user.user_id,
                 headers={'Authorization': self.authorization})
Пример #5
0
 def open_spider(self, spider):
     db.attach(self)