def run(self, args): course_id = args[0] url_only = '--url-only' in args[1:] hd_only = '--hd-only' in args[1:] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './mp4' break else: out_dir = './mp4' if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id) if int(course_data['column_type']) != 3: raise Exception('该课程不是视频课程:%s' % course_data['column_title']) out_dir = os.path.join(out_dir, course_data['column_title']) if not os.path.isdir(out_dir): os.makedirs(out_dir) data = dc.get_course_content(course_id) if url_only: with open( os.path.join(out_dir, '%s.mp4.txt' % course_data['column_title']), 'w') as f: f.write('\n'.join([ "{}:\n{}\n{}\n\n".format( post['article_title'], json.loads(post['video_media']).get('hd', {}).get('url'), json.loads(post['video_media']).get('sd', {}).get('url')) for post in data ])) print("download mp4 url done: " + course_data['column_title']) return for post in data: file_name = format_path(post['article_title'] + ('.hd' if hd_only else '.sd')) if os.path.isfile(os.path.join(out_dir, file_name) + '.ts'): print(file_name + ' exists') continue if hd_only: # some post has sd mp4 only url = json.loads(post['video_media']).get( 'hd', {}).get('url') or json.loads( post['video_media']).get('sd', {}).get('url') else: url = json.loads(post['video_media']).get('sd', {}).get('url') dl = Downloader(3) dl.run(url, dir=out_dir, file_name=file_name) print('download mp4 done: ' + file_name)
def run(self, args): course_id = args[0] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './ebook' break else: out_dir = './ebook' force = '--force' in args[1:] enable_comments = '--enable-comments' in args[1:] source_only = '--source-only' in args[1:] for arg in args[1:]: if '--comment-count=' in arg: comment_count = arg.split('--comment-count=')[1] or 10 break else: comment_count = 10 if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id) if int(course_data['column_type']) not in (1, 2): raise Exception('该课程不提供文本:%s' % course_data['column_title']) # data data = dc.get_course_content(course_id, force=force) if enable_comments: for post in data: post['article_content'] += self._render_comment_html( post['comments'], comment_count) # source file course_data['column_title'] = maker.format_file_name( course_data['column_title']) self.render_column_source_files(course_data, data, out_dir, force=force) # ebook if not source_only: if course_data['update_frequency'] == '全集' and os.path.isfile( os.path.join(out_dir, self._title(course_data)) + '.mobi'): print("{} exists ".format(self._title(course_data))) else: make_mobi(source_dir=os.path.join(out_dir, course_data['column_title']), output_dir=out_dir)
def run(self, args): course_id = args[0] url_only = '--url-only' in args[1:] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './mp3' break else: out_dir = './mp3' if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id) if int(course_data['column_type']) != 1: raise Exception('该课程不提供音频:%s' % course_data['column_title']) out_dir = os.path.join(out_dir, course_data['column_title']) if not os.path.isdir(out_dir): os.makedirs(out_dir) data = dc.get_course_content(course_id) if url_only: with open( os.path.join(out_dir, '%s.mp3.txt' % course_data['column_title']), 'w') as f: # TODO alignment f.write('\n'.join([ "{}:\t\t{}".format(post['article_title'], post['audio_download_url']) for post in data ])) return dl = Downloader() for post in data: file_name = format_path(post['article_title'] + '.mp3') if os.path.isfile(os.path.join(out_dir, file_name)): print(file_name + ' exists') continue if post['audio_download_url']: dl.run(post['audio_download_url'], out_file=file_name, out_dir=out_dir) print('download mp3 done: ' + file_name)
def run(self, args): course_id = args[0] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './ebook' break else: out_dir = './ebook' force = '--force' in args[1:] enable_comments = '--enable-comments' in args[1:] source_only = '--source-only' in args[1:] push = '--push' in args[1:] for arg in args[1:]: if '--comment-count=' in arg: comment_count = arg.split('--comment-count=')[1] or 10 break else: comment_count = 10 if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id) if int(course_data['column_type']) not in (1, 2): raise Exception('该课程不提供文本:%s' % course_data['column_title']) # data data = dc.get_course_content(course_id, force=force) if enable_comments: for post in data: post['article_content'] += self._render_comment_html(post['comments'], comment_count) # source file course_data['column_title'] = maker.format_file_name(course_data['column_title']) self.render_column_source_files(course_data, data, out_dir, force=force)
def run(self, args): course_id = args[0] for arg in args[1:]: if '--out-dir=' in arg: out_dir = arg.split('--out-dir=')[1] or './ebook' break else: out_dir = './ebook' force = '--force' in args[1:] enable_comments = '--enable-comments' in args[1:] source_only = '--source-only' in args[1:] push = '--push' in args[1:] for arg in args[1:]: if '--comment-count=' in arg: comment_count = arg.split('--comment-count=')[1] or 10 break else: comment_count = 10 if not os.path.isdir(out_dir): os.makedirs(out_dir) dc = DataClient() course_data = dc.get_course_intro(course_id, force=True) if int(course_data['column_type']) not in (1, 2): raise Exception('该课程不提供文本:%s' % course_data['column_title']) # data data = dc.get_course_content(course_id, force=force) if enable_comments: for post in data: post['article_content'] += self._render_comment_html( post['comments'], comment_count) # source file course_data['column_title'] = maker.format_file_name( course_data['column_title']) self.render_column_source_files(course_data, data, out_dir, force=force) # ebook if not source_only: if course_data['update_frequency'] == '全集' and os.path.isfile( os.path.join(out_dir, self._title(course_data)) + '.mobi'): print("{} exists ".format(self._title(course_data))) else: print( os.path.join(out_dir, course_data['column_title']) + " -> " + course_data['column_title'].replace("html", "pdf")) # make_mobi(source_dir=os.path.join(out_dir, course_data['column_title']), output_dir=out_dir) if push: fn = os.path.join(out_dir, "{}.mobi".format(self._title(course_data))) if os.path.getsize(fn) / 1024.0 / 1024 > 50: print("电子书大小超过50M") return f = open(fn, 'rb') d = f.read() f.close() with open('smtp.conf') as f: smtp_conf = json.loads(f.read()) m = MailServer(host=smtp_conf['host'], port=smtp_conf['port'], user=smtp_conf['user'], password=smtp_conf['password'], encryption=smtp_conf['encryption']) message = m.build_email(email_to=smtp_conf['email_to'], subject='convert', body='', attachments=[("{}.mobi".format( self._title(course_data)), d)]) m.send_email(message) print("push to kindle done")
def test_get_course_content(dc: DataClient): assert dc.get_course_content(212)