示例#1
0
 def run(self, workflow_input):
     workflow_output = {}
     tmp_result = {}
     for processor in self._processors:
         logger.info('start to run processor: {}'.format(
             processor.__class__.__name__))
         processor.run(workflow_input, tmp_result, workflow_output)
         logger.info('processor: {} run finish'.format(
             processor.__class__.__name__))
     assert 'result' in workflow_output, 'can not find result in workflow_output!'
     return workflow_output['result']
    def run(self, workflow_input, tmp_result, workflow_output):
        focuses = {}

        # get focuses from weibo api
        for music_type, users in users_config.items():
            focuses[music_type] = []
            for user_id, user_name in users:
                retry_time = 0
                while retry_time <= 3:
                    if retry_time > 0:
                        logger.info(
                            'start to retry, current retry time is: {}'.format(
                                retry_time))
                    try:
                        use_cache = False if retry_time else True
                        user = weibo_api.get_user_info(user_id, user_name,
                                                       use_cache)
                        user_focuses = weibo_api.get_focuses_by_user(
                            user, use_cache)
                        focuses[music_type].extend(user_focuses)
                        logger.info(
                            'fetch user: {} data success'.format(user_id))
                        break
                    except Exception as e:
                        logger.exception(
                            'fetch user: {} data error! {}'.format(user_id, e))
                        retry_time += 1
                        time.sleep(1)
                time.sleep(1)
            # distinct, black_list, merge related_users
            black_list = blacklist_config['all'] + blacklist_config[music_type]
            tmp_dict = {}
            for focus in focuses[music_type]:
                if focus.title in black_list:
                    continue
                if focus.title not in tmp_dict:
                    tmp_dict[focus.title] = focus
                else:
                    tmp_dict[focus.title].related_users.extend(
                        focus.related_users)
                # distinct related_users
                tmp_dict[focus.title].related_users = list(
                    set(tmp_dict[focus.title].related_users))

            focuses[music_type] = list(tmp_dict.values())

        # score
        scores = {
            music_type: [f.recent_read for f in each_focuses]
            for music_type, each_focuses in focuses.items()
        }

        tmp_result['focuses'] = focuses
        tmp_result['scores'] = scores
示例#3
0
 def start(self):
     while True:
         for i in range(len(self._workflows)):
             workflow, workflow_input = self._workflows[
                 i], self._workflow_inputs[i]
             try:
                 logger.info('start to run workflow: {}'.format(
                     workflow.__class__.__name__))
                 result = workflow.run(workflow_input)
                 logger.info('workflow: {} run finish, result: {}'.format(
                     workflow.__class__.__name__, result))
             except Exception as e:
                 logger.exception('error for workflow: {}, {}'.format(
                     workflow.__class__.__name__, e))
         time.sleep(self._interval)
示例#4
0
 def run(self, workflow_input, tmp_result, workflow_output):
     posts = {}
     for music_type, users in users_config.items():
         posts[music_type] = []
         for user_id, user_name in users:
             retry_time = 0
             while retry_time <= 3:
                 if retry_time > 0:
                     logger.info(
                         'start to retry, current retry time is: {}'.format(
                             retry_time))
                 try:
                     use_cache = False if retry_time else True
                     # 获取当前用户信息及其微博信息
                     user = weibo_api.get_user_info(user_id, user_name,
                                                    use_cache)
                     user_posts = weibo_api.get_posts_by_user(
                         user, use_cache)
                     # 过滤旧微博, 并截图
                     new_user_posts = []  # 用户的新微博
                     for i, post_element in enumerate(
                             firefox_api.find_elements_in_page(
                                 USER_POSTS_URL_FORMATTER.format(user.id),
                                 POSTS_CSS_SELECTOR)):
                         if i >= len(user_posts) or user_posts[
                                 i].time <= datetime.now() - timedelta(
                                     days=self._before_data):  # 过滤旧微博
                             continue
                         image_path = '{}/{}.png'.format(
                             self._images_dir, user_posts[i].id)
                         user_posts[i].image_path = '{}.png'.format(
                             user_posts[i].id)
                         firefox_api.screenshot(post_element, image_path)
                         new_user_posts.append(user_posts[i])
                     posts[music_type].extend(new_user_posts)
                     logger.info(
                         'fetch user: {} data success'.format(user_id))
                     break
                 except Exception as e:
                     logger.exception(
                         'fetch user: {} data error! {}'.format(user_id, e))
                     retry_time += 1
                     time.sleep(1)
             time.sleep(1)
     tmp_result['posts'] = posts
 def run(self, workflow_input, tmp_result, workflow_output):
     videos = {}
     scores = {}
     for music_type, users in users_config.items():
         videos[music_type] = []
         scores[music_type] = []
         tmp_set = set()  # video去重
         for user_id, user_name in users:
             retry_time = 0
             while retry_time <= 3:
                 if retry_time > 0:
                     logger.info(
                         'start to retry, current retry time is: {}'.format(
                             retry_time))
                 try:
                     use_cache = False if retry_time else True
                     user = weibo_api.get_user_info(user_id, user_name,
                                                    use_cache)
                     user_videos = weibo_api.get_videos_by_user(
                         user, use_cache)
                     for video in user_videos:
                         if video.id in tmp_set or video.time <= datetime.now(
                         ) - timedelta(days=self._before_data):
                             continue
                         cover_f_name = '{}.jpg'.format(video.id)
                         _download_img(
                             video.cover_path,
                             '{}/{}'.format(self._image_dir, cover_f_name))
                         video.cover_path = cover_f_name
                         videos[music_type].append(video)
                         scores[music_type].append(video.view_cnt)
                         tmp_set.add(video.id)
                     logger.info(
                         'fetch user: {} data success'.format(user_id))
                     break
                 except Exception as e:
                     logger.exception(
                         'fetch user: {} data error! {}'.format(user_id, e))
                     retry_time += 1
                     time.sleep(1)
             time.sleep(1)
     tmp_result['videos'] = videos
     tmp_result['scores'] = scores