class AppIdsImporter(object): def __init__(self): self.market = None self.category_ids = None self.database_service = DatabaseService() self.redis_service = RedisService() def imported(self, date_str, **kargs): print 'Started to import ids' logger.info('Started to import ids') for category_id in self.category_ids: import_ids_set = set() for letter in string.uppercase: for content in self._load(date_str, category_id, letter): import_ids_set.update(self._parser(content)) print len(import_ids_set) self._save(import_ids_set) garbage_number = gc.collect() print 'Garbage number:', garbage_number self.database_service.close() def _load(self, date_str, category_id, letter): category_page_key = CATEGORY_PAGE_KEY.format(date=date_str, category_id=category_id, market=self.market, letter=letter) print category_page_key category_pages = self.redis_service.members_set(category_page_key) for category_page in category_pages: yield zlib.decompress(category_page) @abstractmethod def _parser(self, content): """ :param content: :return: ids set """ raise NotImplementedError() def _save(self, ids_set): if not ids_set: return try: self.database_service.import_ids(self.market, ids_set) print 'Succeed import ids: {}'.format(len(ids_set)) logger.info('Succeed import ids: {}'.format(len(ids_set))) except Exception as ex: logger.exception(ex) print ex logger.error('Failed import ids {}'.format(len(ids_set))) print 'Failed import ids.'
class AppDetailImporter(object): def __init__(self): self.market = None self.database_service = DatabaseService() self.redis_service = RedisService() def imported(self, date_str, **kwargs): start_id = kwargs.get('start', 1) end_id = kwargs.get('end', -1) ids = self.database_service.load_ids(self.market, start_id, end_id) for batch_app_ids in chunks(ids, DEFAULT_BATCH_SIZE): print 'Started to import batch:', len(batch_app_ids) logger.info('Started to import batch: {}'.format(len(batch_app_ids))) for app_id in batch_app_ids: content = self._load(date_str, app_id) detail_dict = self._parser(content) self._save(app_id, detail_dict) garbage_number = gc.collect() print 'Garbage number:', garbage_number self.database_service.close() def _load(self, date_str, app_id): app_detail_key = DETAIL_SOURCE_KEY.format(date=date_str, market=self.market, app_id=app_id) detail = self.redis_service.get(app_detail_key) if detail: return zlib.decompress(detail) @abstractmethod def _parser(self, content): """ :param content: :return: detail dict """ raise NotImplementedError() def _save(self, app_id, current_detail): if not current_detail: return try: last_detail = self.database_service.get_app_detail(self.market, app_id) if self._need_to_update(last_detail, current_detail): self.database_service.update_app_detail(self.market, app_id, current_detail) logger.info('Succeed save detail for {} in {}'.format(app_id, self.market)) print 'Succeed save detail for {} in {}'.format(app_id, self.market) if last_detail['name'] != 'NULL': self.database_service.save_event(app_id, last_detail, current_detail) logger.info('Succeed save event for {} in {}'.format(app_id, self.market)) print 'Succeed save event for {} in {}'.format(app_id, self.market) # elif self._need_to_update_icon(last_detail, current_detail): # self.database_service.save_icon_event(app_id, last_detail, current_detail) # logger.info('Succeed save icon event for {} in {}'.format(app_id, self.market)) # print 'Succeed save icon event for {} in {}'.format(app_id, self.market) else: logger.info('Not need to update for {} in {}'.format(app_id, self.market)) print 'Not need to update for {} in {}'.format(app_id, self.market) except Exception as ex: print ex logger.exception(ex) logger.info('Failed update detail and event for {} in {}'.format(app_id, self.market)) print 'Failed save detail and event for {} in {}'.format(app_id, self.market) @staticmethod def _need_to_update(last_detail_dict, current_detail_dict): if last_detail_dict['name'] != current_detail_dict['name']: return True @staticmethod def _need_to_update_icon(last_detail, current_detail_dict): if last_detail['icon_url'] != current_detail_dict['icon_url']: return True