Пример #1
0
class UserPartialSyncer:
    """
    Sync only latest users info from API to db
    """
    def __init__(self):
        self.dataSyncer = DataSyncer('https://api.bgm.tv/user/', User, 435000,
                                     9)

    def calculate_incremental_scraping_range(self):
        #  get current user with maximum id in database
        current_max_id_user = self.dataSyncer.databaseExecutor.session \
            .query(User) \
            .order_by(User.id.desc()) \
            .first()

        current_user_max_id = current_max_id_user.id \
            if current_max_id_user is not None else 0

        return max(1,
                   current_user_max_id), self.dataSyncer.requestHandler.max_id

    def run(self):
        max_db_id, max_api_id = self.calculate_incremental_scraping_range()

        if max_db_id < max_api_id:
            logger.info(
                'Current max user id:%s in database is smaller than max id:%s in API, starting syncing data from'
                ' %s to %s', max_db_id, max_api_id, max_db_id, max_api_id)
            self.dataSyncer.start_scraper(max_db_id, max_api_id + 1)
        else:
            logger.info(
                'Nothing to sync as there\'s no new user. Current max id in API :%s, max id in database: :%s',
                max_api_id, max_db_id)
Пример #2
0
class SubjectFullSyncer:
    """
    Sync subject info from the first one to the latest one
    """
    def __init__(self):
        self.dataSyncer = DataSyncer('https://api.bgm.tv/subject/', Subject,
                                     258000, 9, '?responseGroup=medium')

    def run(self):
        logger.info(
            'Starting a full sync from API to database in range(%s, %s)', 1,
            self.dataSyncer.requestHandler.max_id)
        self.dataSyncer.start_scraper(1, self.dataSyncer.requestHandler.max_id)
Пример #3
0
class UserFullSyncer:
    """
    Sync subject info from the first one to the latest one
    """
    def __init__(self):
        self.dataSyncer = DataSyncer('https://api.bgm.tv/user/', User, 430000,
                                     9)

    def run(self):
        logger.info(
            'Starting a full sync from API to database in range(%s, %s)', 1,
            self.dataSyncer.requestHandler.max_id)
        self.dataSyncer.start_scraper(1, self.dataSyncer.requestHandler.max_id)
Пример #4
0
 def __init__(self):
     settings_file_path = 'record.bgm.settings'  # The path seen from root, ie. from main.py
     os.environ.setdefault('SCRAPY_SETTINGS_MODULE', settings_file_path)
     self.process = CrawlerProcess(get_project_settings())
     self.spiders = RecordSpider
     self.dataSyncer = DataSyncer('https://api.bgm.tv/user/', User, 435000,
                                  8)
Пример #5
0
 def __init__(self):
     self.dataSyncer = DataSyncer('https://api.bgm.tv/subject/', Subject,
                                  258000, 9, '?responseGroup=medium')
Пример #6
0
 def __init__(self):
     self.dataSyncer = DataSyncer('https://api.bgm.tv/user/', User, 435000,
                                  9)