Пример #1
0
        def parse_taoguba_author(command):
            u"""

            :param command: 淘股吧帖子首页
            :return: task:
            """
            result = Match.taoguba_article(command)
            taoguba_article_id = result.group('article_id')
            Debug.logger.debug(u"taoguba_article_id:" +
                               str(taoguba_article_id))
            task = SingleTask()

            task.author_id = taoguba_article_id
            task.kind = 'taoguba_author'
            task.spider.href_article_list = command
            task.spider.href = command
            # todo article info
            task.spider.href_profile = command
            task.book.kind = 'taoguba_author'
            task.book.sql.info_extra = 'creator_id = "{}"'.format(
                taoguba_article_id)
            task.book.sql.article_extra = 'author_id = "{}"'.format(
                taoguba_article_id)
            task.book.author_id = taoguba_article_id
            return task
Пример #2
0
        def parse_sinablog_author(command):
            u"""

            :param command: 某个新浪博客博主的首页地址
            :return: task:
            """
            result = Match.sinablog_author(command)
            sinablog_author_id = result.group('sinablog_people_id')
            Debug.logger.debug(u"sinablog_people_id:" +
                               str(sinablog_author_id))
            task = SingleTask()

            task.author_id = sinablog_author_id
            task.kind = 'sinablog_author'
            task.spider.href_article_list = 'http://blog.sina.com.cn/s/articlelist_{}_0_1.html'.\
                format(sinablog_author_id)
            task.spider.href = 'http://blog.sina.com.cn/u/{}'.format(
                sinablog_author_id)
            task.spider.href_profile = 'http://blog.sina.com.cn/s/profile_{}.html'.format(
                sinablog_author_id)
            task.book.kind = 'sinablog_author'
            task.book.sql.info_extra = 'creator_id = "{}"'.format(
                sinablog_author_id)
            task.book.sql.article_extra = 'author_id = "{}"'.format(
                sinablog_author_id)
            task.book.author_id = sinablog_author_id
            return task
Пример #3
0
        def parse_csdnblog_author(command):
            u"""

            :param command: homepage of someone, e.g. http://blog.csdn.net/elton_xiao
            :return: task
            """
            result = Match.csdnblog_author(command)
            csdnblog_author_id = result.group('csdnblog_author_id')

            task = SingleTask()
            task.author_id = csdnblog_author_id     # ??? don't need?
            task.kind = 'csdnblog_author'
            task.spider.href = 'http://blog.csdn.net/{}'.format(csdnblog_author_id)
            task.book.kind = 'csdnblog_author'
            task.book.sql.info_extra = 'creator_id = "{}"'.format(csdnblog_author_id)
            task.book.sql.article_extra = 'author_id = "{}"'.format(csdnblog_author_id)
            task.book.author_id = csdnblog_author_id
            return task
Пример #4
0
        def parse_jianshu_author(command):
            u"""

            :param command: homepage of someone, e.g. http://www.jianshu.com/users/b1dd2b2c87a8/latest_articles
            :return: task:
            """
            result = Match.jianshu_author(command)
            jianshu_id = result.group('jianshu_id')

            task = SingleTask()
            task.author_id = jianshu_id
            task.kind = 'jianshu_author'
            task.spider.href = 'http://www.jianshu.com/users/{}/latest_articles'.format(jianshu_id)
            task.book.kind = 'jianshu_author'
            task.book.sql.info_extra = 'creator_id = "{}"'.format(jianshu_id)
            task.book.sql.article_extra = 'author_id = "{}"'.format(jianshu_id)
            task.book.author_id = jianshu_id
            return task
Пример #5
0
        def parse_generic(command):
            u"""

            :param command:
            :return:
            """
            from .tools.type import Type
            task = SingleTask()
            for command_type in Type.type_list:
                result = getattr(Match, command_type)(command)
                if result:
                    task.author_id = result.group('subject_id')
                    task.kind = command_type
            task.spider.href = command
            task.book.kind = task.kind
            task.book.sql.info = 'select * from generic_info where creator_id = "{}"'.format(command)
            task.book.sql.answer = 'select * from generic_article where author_id = "{}"'.format(command)
            task.book.author_id = task.spider.href
            return task
Пример #6
0
        def parse_sinablog_author(command):
            u"""

            :param command: 某个新浪博客博主的首页地址
            :return: task:
            """
            result = Match.sinablog_author(command)
            sinablog_author_id = result.group('sinablog_people_id')
            Debug.logger.debug(u"sinablog_people_id:" + str(sinablog_author_id))
            task = SingleTask()

            task.author_id = sinablog_author_id
            task.kind = 'sinablog_author'
            task.spider.href_article_list = 'http://blog.sina.com.cn/s/articlelist_{}_0_1.html'.\
                format(sinablog_author_id)
            task.spider.href = 'http://blog.sina.com.cn/u/{}'.format(sinablog_author_id)
            task.spider.href_profile = 'http://blog.sina.com.cn/s/profile_{}.html'.format(sinablog_author_id)
            task.book.kind = 'sinablog_author'
            task.book.sql.info_extra = 'creator_id = "{}"'.format(sinablog_author_id)
            task.book.sql.article_extra = 'author_id = "{}"'.format(sinablog_author_id)
            task.book.author_id = sinablog_author_id
            return task