class UserPartialSyncer: """ Sync only latest users info from API to db """ def __init__(self): self.dataSyncer = DataSyncer('https://api.bgm.tv/user/', User, 435000, 9) def calculate_incremental_scraping_range(self): # get current user with maximum id in database current_max_id_user = self.dataSyncer.databaseExecutor.session \ .query(User) \ .order_by(User.id.desc()) \ .first() current_user_max_id = current_max_id_user.id \ if current_max_id_user is not None else 0 return max(1, current_user_max_id), self.dataSyncer.requestHandler.max_id def run(self): max_db_id, max_api_id = self.calculate_incremental_scraping_range() if max_db_id < max_api_id: logger.info( 'Current max user id:%s in database is smaller than max id:%s in API, starting syncing data from' ' %s to %s', max_db_id, max_api_id, max_db_id, max_api_id) self.dataSyncer.start_scraper(max_db_id, max_api_id + 1) else: logger.info( 'Nothing to sync as there\'s no new user. Current max id in API :%s, max id in database: :%s', max_api_id, max_db_id)
class SubjectFullSyncer: """ Sync subject info from the first one to the latest one """ def __init__(self): self.dataSyncer = DataSyncer('https://api.bgm.tv/subject/', Subject, 258000, 9, '?responseGroup=medium') def run(self): logger.info( 'Starting a full sync from API to database in range(%s, %s)', 1, self.dataSyncer.requestHandler.max_id) self.dataSyncer.start_scraper(1, self.dataSyncer.requestHandler.max_id)
class UserFullSyncer: """ Sync subject info from the first one to the latest one """ def __init__(self): self.dataSyncer = DataSyncer('https://api.bgm.tv/user/', User, 430000, 9) def run(self): logger.info( 'Starting a full sync from API to database in range(%s, %s)', 1, self.dataSyncer.requestHandler.max_id) self.dataSyncer.start_scraper(1, self.dataSyncer.requestHandler.max_id)
def __init__(self): settings_file_path = 'record.bgm.settings' # The path seen from root, ie. from main.py os.environ.setdefault('SCRAPY_SETTINGS_MODULE', settings_file_path) self.process = CrawlerProcess(get_project_settings()) self.spiders = RecordSpider self.dataSyncer = DataSyncer('https://api.bgm.tv/user/', User, 435000, 8)
def __init__(self): self.dataSyncer = DataSyncer('https://api.bgm.tv/subject/', Subject, 258000, 9, '?responseGroup=medium')
def __init__(self): self.dataSyncer = DataSyncer('https://api.bgm.tv/user/', User, 435000, 9)