Python DataClient.get_course_content示例，geektime_dl.data_client.DataClient.get_course_content Python示例

示例#1

0

显示文件

    def run(self, args):

        course_id = args[0]
        url_only = '--url-only' in args[1:]
        hd_only = '--hd-only' in args[1:]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './mp4'
                break
        else:
            out_dir = './mp4'
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()

        course_data = dc.get_course_intro(course_id)

        if int(course_data['column_type']) != 3:
            raise Exception('该课程不是视频课程:%s' % course_data['column_title'])

        out_dir = os.path.join(out_dir, course_data['column_title'])
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        data = dc.get_course_content(course_id)

        if url_only:
            with open(
                    os.path.join(out_dir,
                                 '%s.mp4.txt' % course_data['column_title']),
                    'w') as f:

                f.write('\n'.join([
                    "{}:\n{}\n{}\n\n".format(
                        post['article_title'],
                        json.loads(post['video_media']).get('hd',
                                                            {}).get('url'),
                        json.loads(post['video_media']).get('sd',
                                                            {}).get('url'))
                    for post in data
                ]))
            print("download mp4 url done: " + course_data['column_title'])
            return

        for post in data:
            file_name = format_path(post['article_title'] +
                                    ('.hd' if hd_only else '.sd'))
            if os.path.isfile(os.path.join(out_dir, file_name) + '.ts'):
                print(file_name + ' exists')
                continue
            if hd_only:  # some post has sd mp4 only
                url = json.loads(post['video_media']).get(
                    'hd', {}).get('url') or json.loads(
                        post['video_media']).get('sd', {}).get('url')
            else:
                url = json.loads(post['video_media']).get('sd', {}).get('url')
            dl = Downloader(3)
            dl.run(url, dir=out_dir, file_name=file_name)
            print('download mp4 done: ' + file_name)

示例#2

0

显示文件

    def run(self, args):

        course_id = args[0]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './ebook'
                break
        else:
            out_dir = './ebook'

        force = '--force' in args[1:]
        enable_comments = '--enable-comments' in args[1:]
        source_only = '--source-only' in args[1:]

        for arg in args[1:]:
            if '--comment-count=' in arg:
                comment_count = arg.split('--comment-count=')[1] or 10
                break
        else:
            comment_count = 10

        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()

        course_data = dc.get_course_intro(course_id)

        if int(course_data['column_type']) not in (1, 2):
            raise Exception('该课程不提供文本:%s' % course_data['column_title'])

        # data
        data = dc.get_course_content(course_id, force=force)

        if enable_comments:
            for post in data:
                post['article_content'] += self._render_comment_html(
                    post['comments'], comment_count)

        # source file
        course_data['column_title'] = maker.format_file_name(
            course_data['column_title'])
        self.render_column_source_files(course_data,
                                        data,
                                        out_dir,
                                        force=force)

        # ebook
        if not source_only:
            if course_data['update_frequency'] == '全集' and os.path.isfile(
                    os.path.join(out_dir, self._title(course_data)) + '.mobi'):
                print("{} exists ".format(self._title(course_data)))
            else:
                make_mobi(source_dir=os.path.join(out_dir,
                                                  course_data['column_title']),
                          output_dir=out_dir)

示例#3

0

显示文件

文件： mp3.py 项目： zoeminghong/geektime_dl

    def run(self, args):

        course_id = args[0]
        url_only = '--url-only' in args[1:]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './mp3'
                break
        else:
            out_dir = './mp3'
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()
        course_data = dc.get_course_intro(course_id)
        if int(course_data['column_type']) != 1:
            raise Exception('该课程不提供音频:%s' % course_data['column_title'])

        out_dir = os.path.join(out_dir, course_data['column_title'])
        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        data = dc.get_course_content(course_id)

        if url_only:
            with open(
                    os.path.join(out_dir,
                                 '%s.mp3.txt' % course_data['column_title']),
                    'w') as f:
                # TODO alignment
                f.write('\n'.join([
                    "{}:\t\t{}".format(post['article_title'],
                                       post['audio_download_url'])
                    for post in data
                ]))

            return

        dl = Downloader()
        for post in data:
            file_name = format_path(post['article_title'] + '.mp3')
            if os.path.isfile(os.path.join(out_dir, file_name)):
                print(file_name + ' exists')
                continue
            if post['audio_download_url']:
                dl.run(post['audio_download_url'],
                       out_file=file_name,
                       out_dir=out_dir)
                print('download mp3 done: ' + file_name)

示例#4

0

显示文件

文件： ebook.py 项目： Jacky-Chang/geektime_dl

    def run(self, args):

        course_id = args[0]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './ebook'
                break
        else:
            out_dir = './ebook'

        force = '--force' in args[1:]
        enable_comments = '--enable-comments' in args[1:]
        source_only = '--source-only' in args[1:]
        push = '--push' in args[1:]

        for arg in args[1:]:
            if '--comment-count=' in arg:
                comment_count = arg.split('--comment-count=')[1] or 10
                break
        else:
            comment_count = 10

        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()

        course_data = dc.get_course_intro(course_id)

        if int(course_data['column_type']) not in (1, 2):
            raise Exception('该课程不提供文本:%s' % course_data['column_title'])

        # data
        data = dc.get_course_content(course_id, force=force)

        if enable_comments:
            for post in data:
                post['article_content'] += self._render_comment_html(post['comments'], comment_count)

        # source file
        course_data['column_title'] = maker.format_file_name(course_data['column_title'])
        self.render_column_source_files(course_data, data, out_dir, force=force)

示例#5

0

显示文件

文件： ebook.py 项目： L5411/geektime_dl

    def run(self, args):

        course_id = args[0]
        for arg in args[1:]:
            if '--out-dir=' in arg:
                out_dir = arg.split('--out-dir=')[1] or './ebook'
                break
        else:
            out_dir = './ebook'

        force = '--force' in args[1:]
        enable_comments = '--enable-comments' in args[1:]
        source_only = '--source-only' in args[1:]
        push = '--push' in args[1:]

        for arg in args[1:]:
            if '--comment-count=' in arg:
                comment_count = arg.split('--comment-count=')[1] or 10
                break
        else:
            comment_count = 10

        if not os.path.isdir(out_dir):
            os.makedirs(out_dir)

        dc = DataClient()
        course_data = dc.get_course_intro(course_id, force=True)

        if int(course_data['column_type']) not in (1, 2):
            raise Exception('该课程不提供文本:%s' % course_data['column_title'])

        # data
        data = dc.get_course_content(course_id, force=force)

        if enable_comments:
            for post in data:
                post['article_content'] += self._render_comment_html(
                    post['comments'], comment_count)

        # source file
        course_data['column_title'] = maker.format_file_name(
            course_data['column_title'])
        self.render_column_source_files(course_data,
                                        data,
                                        out_dir,
                                        force=force)

        # ebook
        if not source_only:
            if course_data['update_frequency'] == '全集' and os.path.isfile(
                    os.path.join(out_dir, self._title(course_data)) + '.mobi'):
                print("{} exists ".format(self._title(course_data)))
            else:
                print(
                    os.path.join(out_dir, course_data['column_title']) +
                    " -> " +
                    course_data['column_title'].replace("html", "pdf"))
                # make_mobi(source_dir=os.path.join(out_dir, course_data['column_title']), output_dir=out_dir)
        if push:

            fn = os.path.join(out_dir,
                              "{}.mobi".format(self._title(course_data)))
            if os.path.getsize(fn) / 1024.0 / 1024 > 50:
                print("电子书大小超过50M")
                return
            f = open(fn, 'rb')
            d = f.read()
            f.close()

            with open('smtp.conf') as f:
                smtp_conf = json.loads(f.read())
            m = MailServer(host=smtp_conf['host'],
                           port=smtp_conf['port'],
                           user=smtp_conf['user'],
                           password=smtp_conf['password'],
                           encryption=smtp_conf['encryption'])
            message = m.build_email(email_to=smtp_conf['email_to'],
                                    subject='convert',
                                    body='',
                                    attachments=[("{}.mobi".format(
                                        self._title(course_data)), d)])
            m.send_email(message)
            print("push to kindle done")

示例#6

0

显示文件

文件： test_local_storage.py 项目： liang-chang/puppeteer_chrome

def test_get_course_content(dc: DataClient):
    assert dc.get_course_content(212)