Пример #1
0
def addin_dubbed_video_mappings(node_data, lang=en_lang_code):
    # Get the dubbed videos from the spreadsheet and substitute them
    # for the video, and topic attributes of the returned data struct.

    build_path = os.path.join(os.getcwd(), "build")

    # Create a dubbed_video_mappings.json, at build folder.
    if os.path.exists(os.path.join(build_path, "dubbed_video_mappings.json")):
        logging.info("Dubbed videos json already exist at %s" % (DUBBED_VIDEOS_MAPPING_FILEPATH))
    else:
        main()

    # Get the list of video ids from dubbed video mappings
    lang_code = get_lang_name(lang).lower()
    dubbed_videos_path = os.path.join(build_path, "dubbed_video_mappings.json")
    with open(dubbed_videos_path, "r") as f:
        dubbed_videos_load = ujson.load(f)

    dubbed_videos_list = dubbed_videos_load.get(lang_code)
    # If dubbed_videos_list is None It means that the language code is not available in dubbed video mappings.
    if not dubbed_videos_list:
        return node_data

    # Get the current youtube_ids, and topic_paths from the khan api node data.
    youtube_ids = []
    topic_paths = []
    for node in node_data:
        node_kind = node.get("kind")
        if node_kind == NodeType.video:
            youtube_ids.append(node.get("youtube_id"))
        if node_kind == NodeType.topic:
            topic_paths.append(node.get("path"))

    en_nodes_path = os.path.join(build_path, "en_nodes.json")
    with open(en_nodes_path, "r") as f:
        en_node_load = ujson.load(f)

    en_node_list = []
    # The en_nodes.json must be the same data structure to node_data variable from khan api.
    for node in en_node_load:
        node_kind = node.get("kind")

        if node_kind == NodeType.video:
            youtube_id = node["youtube_id"]
            if not youtube_id in youtube_ids:
                if youtube_id in dubbed_videos_list:
                    node["youtube_id"] = dubbed_videos_list[youtube_id]
                    node["translated_youtube_lang"] = lang
                    en_node_list.append(node)
                    youtube_ids.append(youtube_id)

        # Append all topics that's not in topic_paths list.
        if node_kind == NodeType.topic:
            if not node["path"] in topic_paths:
                en_node_list.append(node)
                topic_paths.append(node["path"])

    node_data += en_node_list
    return node_data
Пример #2
0
def add_dubbed_video_mappings(node_data, lang=EN_LANG_CODE):
    # Get the dubbed videos from the spreadsheet and substitute them
    # for the video, and topic attributes of the returned data struct.

    # Create a dubbed_video_mappings.json, at build folder.
    build_path = os.path.join(os.getcwd(), "build")
    if os.path.exists(os.path.join(build_path, "dubbed_video_mappings.json")):
        logging.info('Dubbed videos json already exist at %s' %
                     (DUBBED_VIDEOS_MAPPING_FILEPATH))
    else:
        main()

    # Get the list of video ids from dubbed video mappings
    dubbed_videos_path = os.path.join(build_path, "dubbed_video_mappings.json")
    with open(dubbed_videos_path, 'r') as f:
        dubbed_videos_load = ujson.load(f)
    """
    Dubbed video mappings may use the ka_name, lang_name or native_name as
        reference to get a dictionary of language videos.
    """
    lang_name = get_lang_ka_name(lang).lower()
    dubbed_videos_list = dubbed_videos_load.get(lang_name)

    if not dubbed_videos_list:
        lang_name = get_lang_name(lang).lower()
        dubbed_videos_list = dubbed_videos_load.get(lang_name)

    # If dubbed_videos_list is None It means that the language code is not available in dubbed video mappings.
    if not dubbed_videos_list:
        # Look up for the native name if the get_lang_name is null.
        lang_native_name = get_lang_native_name(lang).lower()
        dubbed_videos_list = dubbed_videos_load.get(lang_native_name)

    if not dubbed_videos_list:
        return node_data

    youtube_ids = []
    topic_path_list = []
    for node in node_data:
        node_kind = node.get("kind")
        if node_kind == NodeType.video:
            if node["translated_youtube_lang"] == lang:
                youtube_ids.append(node.get("youtube_id"))
        if node_kind == NodeType.topic:
            topic_path_list.append(node.get("path"))

    # Generate and cache `en_nodes.json` for dubbed video mappings.
    url = API_URL.format(projection=json.dumps(PROJECTION_KEYS),
                         lang=EN_LANG_CODE,
                         ka_domain=KA_DOMAIN)
    download_and_clean_kalite_data(url,
                                   lang=EN_LANG_CODE,
                                   ignorecache=False,
                                   filename="en_nodes.json")
    en_nodes_path = os.path.join(build_path, "en_nodes.json")
    with open(en_nodes_path, 'r') as f:
        en_node_load = ujson.load(f)

    translated_node_list = []
    # The en_nodes.json must be the same data structure to node_data variable from khan api.
    for node in en_node_load:
        node_kind = node.get("kind")
        # Append all topics that's not in topic path list.

        if (node_kind == NodeType.topic):
            if not node["path"] in topic_path_list:
                translated_node_list.append(node)
                topic_path_list.append(node["path"])

        if (node_kind == NodeType.video):
            youtube_id = node["youtube_id"]
            if youtube_id not in youtube_ids:
                if youtube_id in dubbed_videos_list:
                    node["youtube_id"] = dubbed_videos_list[youtube_id]
                    node["translated_youtube_lang"] = lang
                    translated_node_list.append(node)
                    youtube_ids.append(youtube_id)

    # remove all video nodes who have a dubbed video associated with them
    node_data = [
        node for node in node_data
        if node.get('youtube_id') not in dubbed_videos_list
    ]
    node_data += translated_node_list
    return node_data
def add_dubbed_video_mappings(node_data, lang=EN_LANG_CODE):
    # Get the dubbed videos from the spreadsheet and substitute them
    # for the video, and topic attributes of the returned data struct.

    # Create a dubbed_video_mappings.json, at build folder.
    build_path = os.path.join(os.getcwd(), "build")
    if os.path.exists(os.path.join(build_path, "dubbed_video_mappings.json")):
        logging.info("Dubbed videos json already exist at %s" % (DUBBED_VIDEOS_MAPPING_FILEPATH))
    else:
        main()

    # Get the list of video ids from dubbed video mappings
    dubbed_videos_path = os.path.join(build_path, "dubbed_video_mappings.json")
    with open(dubbed_videos_path, "r") as f:
        dubbed_videos_load = ujson.load(f)

    """
    Dubbed video mappings may use the ka_name, lang_name or native_name as
        reference to get a dictionary of language videos.
    """
    lang_name = get_lang_ka_name(lang).lower()
    dubbed_videos_list = dubbed_videos_load.get(lang_name)

    if not dubbed_videos_list:
        lang_name = get_lang_name(lang).lower()
        dubbed_videos_list = dubbed_videos_load.get(lang_name)

    # If dubbed_videos_list is None It means that the language code is not available in dubbed video mappings.
    if not dubbed_videos_list:
        # Look up for the native name if the get_lang_name is null.
        lang_native_name = get_lang_native_name(lang).lower()
        dubbed_videos_list = dubbed_videos_load.get(lang_native_name)

    if not dubbed_videos_list:
        return node_data

    youtube_ids = []
    topic_path_list = []
    for node in node_data:
        node_kind = node.get("kind")
        if node_kind == NodeType.video:
            if node["translated_youtube_lang"] == lang:
                youtube_ids.append(node.get("youtube_id"))
        if node_kind == NodeType.topic:
            topic_path_list.append(node.get("path"))

    # Generate and cache `en_nodes.json` for dubbed video mappings.
    url = API_URL.format(projection=json.dumps(PROJECTION_KEYS), lang=EN_LANG_CODE, ka_domain=KA_DOMAIN)
    download_and_clean_kalite_data(url, lang=EN_LANG_CODE, ignorecache=False, filename="en_nodes.json")
    en_nodes_path = os.path.join(build_path, "en_nodes.json")
    with open(en_nodes_path, "r") as f:
        en_node_load = ujson.load(f)

    translated_node_list = []
    # The en_nodes.json must be the same data structure to node_data variable from khan api.
    for node in en_node_load:
        node_kind = node.get("kind")
        # Append all topics that's not in topic path list.

        if node_kind == NodeType.topic:
            if not node["path"] in topic_path_list:
                translated_node_list.append(node)
                topic_path_list.append(node["path"])

        if node_kind == NodeType.video:
            youtube_id = node["youtube_id"]
            if not youtube_id in youtube_ids:
                if youtube_id in dubbed_videos_list:
                    node["youtube_id"] = dubbed_videos_list[youtube_id]
                    node["translated_youtube_lang"] = lang
                    translated_node_list.append(node)
                    youtube_ids.append(youtube_id)

    node_data += translated_node_list
    return node_data