Exemplo n.º 1
0
def get_resource(term_id, token, file_types=[VIDEO, PDF, RICH_TEXT]):
    """ 获取课件信息 """
    resource_list = []

    course_info = get_courseinfo(term_id, token)
    for chapter_num, chapter in enumerate(
            course_info.get("results").get("termDto").get("chapters")):
        for lesson_num, lesson in enumerate(chapter.get("lessons")):
            for unit_num, unit in enumerate(lesson.get("units")):
                if unit["contentType"] not in file_types:
                    continue
                courseware_num = (chapter_num + 1, lesson_num + 1,
                                  unit_num + 1)
                file_path = CONFIG["file_path_template"].format(
                    base_dir=base_dir,
                    sep=os.path.sep,
                    type=COURSEWARE.get(unit["contentType"], "Unknown"),
                    cnt_1=get_section_num(courseware_num, level=1),
                    cnt_2=get_section_num(courseware_num, level=2),
                    cnt_3=get_section_num(courseware_num, level=3),
                    chapter_name=repair_filename(chapter["name"]),
                    lesson_name=repair_filename(lesson["name"]),
                    unit_name=repair_filename(unit["name"]),
                )
                touch_dir(os.path.dirname(file_path))

                if unit["contentType"] == VIDEO:
                    ext = ".mp4"
                    file_path += ext
                    playlist.write_path(file_path)
                    resource_list.append(
                        (VIDEO, file_path, unit["id"], unit["contentId"]))
                elif unit["contentType"] == PDF:
                    file_path += ".pdf"
                    resource_list.append(
                        (PDF, file_path, unit["id"], unit["contentId"]))
                elif unit["contentType"] == RICH_TEXT:
                    if unit.get("jsonContent"):
                        json_content = eval(unit["jsonContent"])
                        file_path = CONFIG["file_path_template"].format(
                            base_dir=base_dir,
                            sep=os.path.sep,
                            type="File",
                            cnt_1=get_section_num(courseware_num, level=1),
                            cnt_2=get_section_num(courseware_num, level=2),
                            cnt_3=get_section_num(courseware_num, level=3),
                            chapter_name=repair_filename(chapter["name"]),
                            lesson_name=repair_filename(lesson["name"]),
                            unit_name=repair_filename(
                                os.path.splitext(json_content["fileName"])[0])
                            + os.path.splitext(json_content["fileName"])[1],
                        )
                        touch_dir(os.path.dirname(file_path))

                        resource_list.append(
                            (RICH_TEXT, file_path, json_content))

    return resource_list
Exemplo n.º 2
0
def get_resource(term_id, token):
    """ 获取课件信息 """
    resource_list = []

    course_info = get_courseinfo(term_id, token)
    for chapter_num, chapter in enumerate(course_info.get('results').get('termDto').get('chapters')):
        for lesson_num, lesson in enumerate(chapter.get('lessons')):
            for unit_num, unit in enumerate(lesson.get('units')):
                courseware_num = (chapter_num+1, lesson_num+1, unit_num+1)
                file_path = os.path.join(
                    base_dir,
                    get_section_num(courseware_num, level=1) + " " + repair_filename(chapter["name"]),
                    get_section_num(courseware_num, level=2) + " " + repair_filename(lesson["name"]),
                    get_section_num(courseware_num, level=3) + " " + repair_filename(unit["name"])
                )
                touch_dir(os.path.dirname(file_path))

                if unit['contentType'] == VIDEO:
                    ext = '.mp4'
                    file_path += ext
                    playlist.write_path(file_path)
                    resource_list.append((
                        VIDEO,
                        file_path,
                        unit['id'],
                        unit['contentId']
                    ))
                elif unit['contentType'] == PDF:
                    file_path += ".pdf"
                    resource_list.append((
                        PDF,
                        file_path,
                        unit['id'],
                        unit['contentId']
                    ))
                elif unit['contentType'] == RICH_TEXT:
                    if unit.get('jsonContent'):
                        json_content = eval(unit['jsonContent'])
                        file_path = os.path.join(
                            base_dir,
                            get_section_num(courseware_num, level=1) + " " + repair_filename(chapter["name"]),
                            get_section_num(courseware_num, level=2) + " " + repair_filename(lesson["name"]),
                            get_section_num(courseware_num, level=3) + " " + repair_filename(json_content["fileName"])
                        )
                        resource_list.append((
                            RICH_TEXT,
                            file_path,
                            json_content
                        ))

    return resource_list
Exemplo n.º 3
0
def parse(url, config):
    # 获取标题
    CONFIG.update(config)
    spider.set_cookies(config["cookies"])
    title = get_title(url)
    print(title)

    # 创建所需目录结构
    CONFIG["base_dir"] = touch_dir(repair_filename(os.path.join(
        CONFIG['dir'], title + " - bilibili")))
    CONFIG["video_dir"] = touch_dir(os.path.join(CONFIG['base_dir'], "Videos"))
    if CONFIG["playlist_type"] == "dpl":
        CONFIG['playlist'] = Dpl(os.path.join(
            CONFIG['base_dir'], 'Playlist.dpl'), path_type=CONFIG["playlist_path_type"])
    elif CONFIG["playlist_type"] == "m3u":
        CONFIG['playlist'] = M3u(os.path.join(
            CONFIG['base_dir'], 'Playlist.m3u'), path_type=CONFIG["playlist_path_type"])
    else:
        CONFIG['playlist'] = None

    # 获取需要的信息
    videos = get_videos(url)
    CONFIG["videos"] = videos
    if CONFIG['playlist'] is not None:
        CONFIG['playlist'].flush()

    # 解析并过滤不需要的选集
    episodes = parse_episodes(CONFIG["episodes"], len(videos))
    videos = list(filter(lambda video: video.id in episodes, videos))
    CONFIG["videos"] = videos

    # 解析片段信息及视频 url
    for i, video in enumerate(videos):
        print("{:02}/{:02} parsing segments info...".format(i, len(videos)), end="\r")
        parse_segment_info(video)

    # 导出下载所需数据
    exports.update({
        "videos": videos,
        "video_dir": CONFIG["video_dir"]
    })
Exemplo n.º 4
0
def convert_danmaku(video_path_list):
    """ 将视频文件夹下的 xml 弹幕转换为 ass 弹幕 """
    # 检测插件是否已经就绪
    plugin_url = "https://raw.githubusercontent.com/m13253/danmaku2ass/master/danmaku2ass.py"
    plugin_path = "plugins/danmaku2ass.py"
    touch_dir(os.path.dirname(plugin_path))
    touch_file(os.path.join(os.path.dirname(plugin_path), "__init__.py"))
    if not os.path.exists(plugin_path):
        print("下载插件中……")
        res = requests.get(plugin_url)
        with open(plugin_path, "w", encoding="utf8") as f:
            f.write(res.text)

    # 使用插件进行转换
    from plugins.danmaku2ass import Danmaku2ASS
    for video_path in video_path_list:
        name = os.path.splitext(video_path)[0]
        print("convert {} ".format(os.path.split(name)[-1]), end="\r")
        if not os.path.exists(name+".mp4") or \
                not os.path.exists(name+".xml"):
            continue
        cap = cv2.VideoCapture(name + ".mp4")
        __, frame = cap.read()
        h, w, __ = frame.shape
        Danmaku2ASS(name + ".xml",
                    "autodetect",
                    name + ".ass",
                    w,
                    h,
                    reserve_blank=0,
                    font_face=_('(FONT) sans-serif')[7:],
                    font_size=w / 40,
                    text_opacity=0.8,
                    duration_marquee=15.0,
                    duration_still=10.0,
                    comment_filter=None,
                    is_reduce_comments=False,
                    progress_callback=None)
Exemplo n.º 5
0
def get_resource(term_id, token, file_types=[VIDEO, PDF, RICH_TEXT]):
    """ 获取课件信息 """
    resource_list = []

    course_info = get_courseinfo(term_id, token)
    for chapter_num, chapter in enumerate(course_info.get('results').get('termDto').get('chapters')):
        for lesson_num, lesson in enumerate(chapter.get('lessons')):
            for unit_num, unit in enumerate(lesson.get('units')):
                if unit['contentType'] not in file_types:
                    continue
                courseware_num = (chapter_num+1, lesson_num+1, unit_num+1)
                file_path = CONFIG['file_path_template'].format(
                    base_dir=base_dir,
                    sep=os.path.sep,
                    type=COURSEWARE.get(unit['contentType'], 'Unknown'),
                    cnt_1=get_section_num(courseware_num, level=1),
                    cnt_2=get_section_num(courseware_num, level=2),
                    cnt_3=get_section_num(courseware_num, level=3),
                    chapter_name=repair_filename(chapter["name"]),
                    lesson_name=repair_filename(lesson["name"]),
                    unit_name=repair_filename(unit["name"])
                )
                touch_dir(os.path.dirname(file_path))

                if unit['contentType'] == VIDEO:
                    ext = '.mp4'
                    file_path += ext
                    playlist.write_path(file_path)
                    resource_list.append((
                        VIDEO,
                        file_path,
                        unit['id'],
                        unit['contentId']
                    ))
                elif unit['contentType'] == PDF:
                    file_path += ".pdf"
                    resource_list.append((
                        PDF,
                        file_path,
                        unit['id'],
                        unit['contentId']
                    ))
                elif unit['contentType'] == RICH_TEXT:
                    if unit.get('jsonContent'):
                        json_content = eval(unit['jsonContent'])
                        file_path = CONFIG['file_path_template'].format(
                            base_dir=base_dir,
                            sep=os.path.sep,
                            type='File',
                            cnt_1=get_section_num(courseware_num, level=1),
                            cnt_2=get_section_num(courseware_num, level=2),
                            cnt_3=get_section_num(courseware_num, level=3),
                            chapter_name=repair_filename(chapter["name"]),
                            lesson_name=repair_filename(lesson["name"]),
                            unit_name=repair_filename(os.path.splitext(json_content["fileName"])[0]) + \
                                                    os.path.splitext(json_content["fileName"])[1]
                        )
                        touch_dir(os.path.dirname(file_path))

                        resource_list.append((
                            RICH_TEXT,
                            file_path,
                            json_content
                        ))

    return resource_list
Exemplo n.º 6
0

if __name__ == "__main__":
    root = CONFIG["root"]
    num_thread = CONFIG["num_thread"]
    url = sys.argv[1]

    # 登录并获取信息
    token = login(CONFIG["username"], CONFIG["password"])
    term_id, course_name = get_summary(url)
    course_id = re.match(r"https?://www.icourse163.org/(course|learn)/\w+-(\d+)", url).group(2)
    print(course_name)
    print(course_id)

    # 创建必要环境
    base_dir = touch_dir(os.path.join(root, course_name))
    playlist = Dpl(os.path.join(base_dir, 'Playlist.dpl'))

    # 获取资源列表
    resource_list = get_resource(term_id, token, file_types=CONFIG['file_types'])

    # 解析资源
    resources = []
    merge_list = []
    for i, resource in enumerate(resource_list):
        print("parse_resource {}/{}".format(i, len(resource_list)), end="\r")
        url, file_path, params = parse_resource(resource, token)
        # 过滤掉已经下载的资源
        if os.path.exists(file_path) and not CONFIG['overwrite']:
            print('[info] {} already exists!'.format(file_path))
            continue