Exemplo n.º 1
0
    def handle(self, *args, **options):
        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        lang_code = lcode_to_ietf(options["lang_code"])
        if lang_code not in AVAILABLE_EXERCISE_LANGUAGE_CODES:
            logging.info("No exercises available for language %s" % lang_code)

        else:
            # Get list of exercises
            exercise_ids = options["exercise_ids"].split(
                ",") if options["exercise_ids"] else None
            exercise_ids = exercise_ids or ([
                ex["id"]
                for ex in get_topic_exercises(topic_id=options["topic_id"])
            ] if options["topic_id"] else None)
            exercise_ids = exercise_ids or get_node_cache("Exercise").keys()

            # Download the exercises
            for exercise_id in exercise_ids:
                scrape_exercise(exercise_id=exercise_id,
                                lang_code=lang_code,
                                force=options["force"])

        logging.info("Process complete.")
def get_all_prepped_lang_codes():
    """Pre-prepped language codes, for downloading srts"""
    lang_codes = []
    for filename in get_all_download_status_files():
        lang_code = os.path.basename(filename).split("_")[0]
        lang_codes.append(lcode_to_ietf(lang_code))
    return lang_codes
    def handle(self, *args, **options):
        if not settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the central server.")

        # None represents all
        lang_codes = [lcode_to_ietf(options["lang_code"])] if options["lang_code"] else None
        del options["lang_code"]

        if len(args) > 1:
            raise CommandError("Max 1 arg")

        elif len(args) == 1:
            if args[0] == "clear":
                logging.info("Clearing subtitles...")
                clear_subtitles_cache(lang_codes)

            else:
                raise CommandError("Unknown argument: %s" % args[0])

        else:
            validate_language_map(lang_codes)

            logging.info("Downloading...")
            download_srt_from_3rd_party(lang_codes=lang_codes, **options)

            validate_language_map(lang_codes)  # again at the end, so output is visible

            # for compatibility with KA Lite versions less than 0.10.3
            for lang in (lang_codes or get_langs_with_subtitles()):
                generate_srt_availability_file(lang)

        logging.info("Process complete.")
Exemplo n.º 4
0
def get_all_prepped_lang_codes():
    """Pre-prepped language codes, for downloading srts"""
    lang_codes = []
    for filename in get_all_download_status_files():
        lang_code = os.path.basename(filename).split("_")[0]
        lang_codes.append(lcode_to_ietf(lang_code))
    return lang_codes
Exemplo n.º 5
0
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles")
    src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" % (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path)  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn("%s is not empty; will not remove.  Please check that all subtitles were moved." % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
Exemplo n.º 6
0
    def handle(self, *args, **options):
        if not settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the central server.")

        # None represents all
        lang_codes = [lcode_to_ietf(options["lang_code"])] if options["lang_code"] else None
        del options["lang_code"]

        if len(args) > 1:
            raise CommandError("Max 1 arg")

        elif len(args) == 1:
            if args[0] == "clear":
                logging.info("Clearing subtitles...")
                clear_subtitles_cache(lang_codes)

            else:
                raise CommandError("Unknown argument: %s" % args[0])

        else:
            validate_language_map(lang_codes)

            logging.info("Downloading...")
            download_srt_from_3rd_party(lang_codes=lang_codes, **options)

            validate_language_map(lang_codes)  # again at the end, so output is visible

            # for compatibility with KA Lite versions less than 0.10.3
            for lang in (lang_codes or get_langs_with_subtitles()):
                generate_srt_availability_file(lang)

        logging.info("Process complete.")
Exemplo n.º 7
0
def move_srts(lang_code):
    """
    Srts live in the locale directory, but that's not exposed at any URL.  So instead,
    we have to move the srts out to /static/subtitles/[lang_code]/
    """
    lang_code_ietf = lcode_to_ietf(lang_code)
    lang_code_django = lcode_to_django_dir(lang_code)

    subtitles_static_dir = os.path.join(settings.STATIC_ROOT, "subtitles")
    src_dir = os.path.join(LOCALE_ROOT, lang_code_django, "subtitles")
    dest_dir = get_srt_path(lang_code_django)
    ensure_dir(dest_dir)

    lang_subtitles = glob.glob(os.path.join(src_dir, "*.srt"))
    logging.info("Moving %d subtitles from %s to %s" %
                 (len(lang_subtitles), src_dir, dest_dir))

    for fil in lang_subtitles:
        srt_dest_path = os.path.join(dest_dir, os.path.basename(fil))
        if os.path.exists(srt_dest_path):
            os.remove(srt_dest_path
                      )  # we're going to replace any srt with a newer version
        shutil.move(fil, srt_dest_path)

    if not os.path.exists(src_dir):
        logging.info("No subtitles for language pack %s" % lang_code)
    elif os.listdir(src_dir):
        logging.warn(
            "%s is not empty; will not remove.  Please check that all subtitles were moved."
            % src_dir)
    else:
        logging.info("Removing empty source directory (%s)." % src_dir)
        shutil.rmtree(src_dir)
Exemplo n.º 8
0
def get_language_pack(lang_code, software_version, callback):
    """Download language pack for specified language"""

    lang_code = lcode_to_ietf(lang_code)
    logging.info("Retrieving language pack: %s" % lang_code)
    request_url = get_language_pack_url(lang_code, software_version)
    path, response = download_file(request_url, callback=callback_percent_proxy(callback))
    return path
Exemplo n.º 9
0
def get_language_pack(lang_code, software_version, callback):
    """Download language pack for specified language"""

    lang_code = lcode_to_ietf(lang_code)
    logging.info("Retrieving language pack: %s" % lang_code)
    request_url = get_language_pack_url(lang_code, software_version)
    path, response = download_file(request_url,
                                   callback=callback_percent_proxy(callback))
    return path
Exemplo n.º 10
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError(
                "This must only be run on distributed servers server.")

        lang_code = lcode_to_ietf(options["lang_code"])
        software_version = options["software_version"]
        logging.info(
            "Downloading language pack for lang_code=%s, software_version=%s" %
            (lang_code, software_version))

        # Download the language pack
        try:
            if options['file']:
                self.start(
                    _("Using local language pack '%(filepath)s'") %
                    {"filepath": options['file']})
                zip_filepath = options['file']
            else:
                self.start(
                    _("Downloading language pack '%(lang_code)s'") %
                    {"lang_code": lang_code})
                zip_filepath = get_language_pack(lang_code,
                                                 software_version,
                                                 callback=self.cb)

            # Unpack into locale directory
            self.next_stage(
                _("Unpacking language pack '%(lang_code)s'") %
                {"lang_code": lang_code})
            unpack_language(lang_code, zip_filepath=zip_filepath)

            #
            self.next_stage(
                _("Creating static files for language pack '%(lang_code)s'") %
                {"lang_code": lang_code})
            update_jsi18n_file(lang_code)

            self.next_stage(
                _("Moving files to their appropriate local disk locations."))
            move_dubbed_video_map(lang_code)
            move_exercises(lang_code)
            move_srts(lang_code)
            move_video_sizes_file(lang_code)

            self.next_stage(_("Invalidate caches"))
            caching.invalidate_all_caches()

            self.complete(
                _("Finished processing language pack %(lang_code)s") %
                {"lang_code": lang_code})
        except Exception as e:
            self.cancel(stage_status="error",
                        notes=_("Error: %(error_msg)s") %
                        {"error_msg": unicode(e)})
            raise
Exemplo n.º 11
0
def update_videos(request, max_to_show=4):
    installed_languages = get_installed_language_packs(force=True).copy() # we copy to avoid changing the original installed language list
    default_language_name = lang_best_name(installed_languages.pop(lcode_to_ietf(request.session["default_language"])))
    languages_to_show = [lang_best_name(l) for l in installed_languages.values()[:max_to_show]]
    other_languages_count = max(0, len(installed_languages) - max_to_show)

    context = update_context(request)
    context.update({
        "video_count": VideoFile.objects.filter(percent_complete=100).count(),
        "languages": languages_to_show,
        "default_language_name": default_language_name,
        "other_languages_count": other_languages_count,
    })
    return context
Exemplo n.º 12
0
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or get_langs_with_subtitles()
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        download_status = softload_json(lm_file, raises=True)
        for key in download_status:
            download_status[key] = {u'downloaded': False, u'last_success': u'', u'last_attempt': u'', u'api_response': u''}
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
Exemplo n.º 13
0
    def handle(self, *args, **options):
        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")


        lang_code = lcode_to_ietf(options["lang_code"])
        if lang_code not in AVAILABLE_EXERCISE_LANGUAGE_CODES:
            logging.info("No exercises available for language %s" % lang_code)

        else:
            # Get list of exercises
            exercise_ids = options["exercise_ids"].split(",") if options["exercise_ids"] else None
            exercise_ids = exercise_ids or ([ex["id"] for ex in get_topic_exercises(topic_id=options["topic_id"])] if options["topic_id"] else None)
            exercise_ids = exercise_ids or get_node_cache("Exercise").keys()

            # Download the exercises
            for exercise_id in exercise_ids:
                scrape_exercise(exercise_id=exercise_id, lang_code=lang_code, force=options["force"])

        logging.info("Process complete.")
Exemplo n.º 14
0
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or get_langs_with_subtitles()
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        download_status = softload_json(lm_file, raises=True)
        for key in download_status:
            download_status[key] = {u'downloaded': False, u'last_success': u'', u'last_attempt': u'', u'api_response': u''}
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
Exemplo n.º 15
0
def validate_language_map(lang_codes):
    """
    This function will tell you any blockers that you'll hit while
    running this command.

    All srt languages must exist in the language map; missing languages
    will cause errors during command running (which can be long).
    This function avoids that problem by doing the above consistency check.
    """
    lang_codes = lang_codes or get_all_prepped_lang_codes()
    missing_langs = []
    for lang_code in lang_codes:
        try:
            get_language_name(lcode_to_ietf(lang_code), error_on_missing=True)
        except LanguageNotFoundError:
            missing_langs.append(lang_code)

    if missing_langs:
        logging.warn("Please add the following language codes to %s:\n\t%s" % (
            LANG_LOOKUP_FILEPATH, missing_langs,
        ))
Exemplo n.º 16
0
def scrape_exercise(exercise_id, lang_code, force=False):
    ietf_lang_code = lcode_to_ietf(lang_code)

    exercise_dest_filepath = get_exercise_filepath(exercise_id, lang_code=lang_code)
    exercise_localized_root = os.path.dirname(exercise_dest_filepath)

    if os.path.exists(exercise_dest_filepath) and not force:
        return

    exercise_url = "https://es.khanacademy.org/khan-exercises/exercises/%s.html?lang=%s" % (exercise_id, ietf_lang_code)
    logging.info("Retrieving exercise %s from %s" % (exercise_id, exercise_url))

    try:
        ensure_dir(exercise_localized_root)

        resp = requests.get(exercise_url)
        resp.raise_for_status()
        with open(exercise_dest_filepath, "wb") as fp:
            fp.write(resp.content)
    except Exception as e:
        logging.error("Failed to download %s: %s" % (exercise_url, e))
Exemplo n.º 17
0
def validate_language_map(lang_codes):
    """
    This function will tell you any blockers that you'll hit while
    running this command.

    All srt languages must exist in the language map; missing languages
    will cause errors during command running (which can be long).
    This function avoids that problem by doing the above consistency check.
    """
    lang_codes = lang_codes or get_all_prepped_lang_codes()
    missing_langs = []
    for lang_code in lang_codes:
        try:
            get_language_name(lcode_to_ietf(lang_code), error_on_missing=True)
        except LanguageNotFoundError:
            missing_langs.append(lang_code)

    if missing_langs:
        logging.warn("Please add the following language codes to %s:\n\t%s" % (
            LANG_LOOKUP_FILEPATH, missing_langs,
        ))
Exemplo n.º 18
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on distributed servers server.")

        lang_code = lcode_to_ietf(options["lang_code"])
        software_version = options["software_version"]
        logging.info("Downloading language pack for lang_code=%s, software_version=%s" % (lang_code, software_version))

        # Download the language pack
        try:
            if options['file']:
                self.start(_("Using local language pack '%(filepath)s'") % {"filepath": options['file']})
                zip_filepath = options['file']
            else:
                self.start(_("Downloading language pack '%(lang_code)s'") % {"lang_code": lang_code})
                zip_filepath = get_language_pack(lang_code, software_version, callback=self.cb)

            # Unpack into locale directory
            self.next_stage(_("Unpacking language pack '%(lang_code)s'") % {"lang_code": lang_code})
            unpack_language(lang_code, zip_filepath=zip_filepath)

            #
            self.next_stage(_("Creating static files for language pack '%(lang_code)s'") % {"lang_code": lang_code})
            update_jsi18n_file(lang_code)


            self.next_stage(_("Moving files to their appropriate local disk locations."))
            move_dubbed_video_map(lang_code)
            move_exercises(lang_code)
            move_srts(lang_code)
            move_video_sizes_file(lang_code)

            self.next_stage(_("Invalidate caches"))
            caching.invalidate_all_caches()

            self.complete(_("Finished processing language pack %(lang_code)s") % {"lang_code": lang_code})
        except Exception as e:
            self.cancel(stage_status="error", notes=_("Error: %(error_msg)s") % {"error_msg": unicode(e)})
            raise
Exemplo n.º 19
0
def update_videos(request, max_to_show=4):
    installed_languages = get_installed_language_packs(force=True).copy(
    )  # we copy to avoid changing the original installed language list
    default_language_name = lang_best_name(
        installed_languages.pop(
            lcode_to_ietf(request.session["default_language"])))
    languages_to_show = [
        lang_best_name(l) for l in installed_languages.values()[:max_to_show]
    ]
    other_languages_count = max(0, len(installed_languages) - max_to_show)

    context = update_context(request)
    context.update({
        "video_count":
        VideoFile.objects.filter(percent_complete=100).count(),
        "languages":
        languages_to_show,
        "default_language_name":
        default_language_name,
        "other_languages_count":
        other_languages_count,
    })
    return context
Exemplo n.º 20
0
    def handle(self, *args, **options):
        if settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the distributed server.")

        if not options["lang_code"]:
            raise CommandError("You must specify a language code.")

        #
        ensure_dir(settings.CONTENT_ROOT)

        # Get list of videos
        lang_code = lcode_to_ietf(options["lang_code"])
        video_map = get_dubbed_video_map(lang_code) or {}
        video_ids = options["video_ids"].split(",") if options["video_ids"] else None
        video_ids = video_ids or ([vid["id"] for vid in get_topic_videos(topic_id=options["topic_id"])] if options["topic_id"] else None)
        video_ids = video_ids or video_map.keys()

        # Download the videos
        for video_id in video_ids:
            if video_id in video_map:
                youtube_id = video_map[video_id]

            elif video_id in video_map.values():
                # Perhaps they sent in a youtube ID?  We can handle that!
                youtube_id = video_id
            else:
                logging.error("No mapping for video_id=%s; skipping" % video_id)
                continue

            try:
                scrape_video(youtube_id=youtube_id, format=options["format"], force=options["force"])
                #scrape_thumbnail(youtube_id=youtube_id)
                logging.info("Access video %s at %s" % (youtube_id, get_node_cache("Video")[video_id][0]["path"]))
            except Exception as e:
                logging.error("Failed to download video %s: %s" % (youtube_id, e))

        logging.info("Process complete.")
Exemplo n.º 21
0
def download_srt_from_3rd_party(lang_codes=None, **kwargs):
    """Download subtitles specified by command line args"""

    lang_codes = lang_codes or get_all_prepped_lang_codes()
    bad_languages = {}

    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)
        lang_code = get_supported_language_map(lang_code)['amara']

        try:
            lang_map_filepath = get_lang_map_filepath(lang_code)
            if not os.path.exists(lang_map_filepath):
                videos = {}  # happens if an unknown set for subtitles.
            else:
                with open(lang_map_filepath, "r") as fp:
                    videos = json.load(fp)
        except Exception as e:
            error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

        try:
            download_if_criteria_met(videos, lang_code=lang_code, **kwargs)
        except Exception as e:
            error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

    # now report final results
    if bad_languages:
        outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys())
        outstr += "\n" + str(bad_languages)
        logging.error(outstr)
Exemplo n.º 22
0
def scrape_exercise(exercise_id, lang_code, force=False):
    ietf_lang_code = lcode_to_ietf(lang_code)

    exercise_dest_filepath = get_exercise_filepath(exercise_id,
                                                   lang_code=lang_code)
    exercise_localized_root = os.path.dirname(exercise_dest_filepath)

    if os.path.exists(exercise_dest_filepath) and not force:
        return

    exercise_url = "https://es.khanacademy.org/khan-exercises/exercises/%s.html?lang=%s" % (
        exercise_id, ietf_lang_code)
    logging.info("Retrieving exercise %s from %s" %
                 (exercise_id, exercise_url))

    try:
        ensure_dir(exercise_localized_root)

        resp = requests.get(exercise_url)
        resp.raise_for_status()
        with open(exercise_dest_filepath, "wb") as fp:
            fp.write(resp.content)
    except Exception as e:
        logging.error("Failed to download %s: %s" % (exercise_url, e))
Exemplo n.º 23
0
def download_srt_from_3rd_party(lang_codes=None, **kwargs):
    """Download subtitles specified by command line args"""

    lang_codes = lang_codes or get_all_prepped_lang_codes()
    bad_languages = {}

    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)
        lang_code = get_supported_language_map(lang_code)['amara']

        try:
            lang_map_filepath = get_lang_map_filepath(lang_code)
            if not os.path.exists(lang_map_filepath):
                videos = {}  # happens if an unknown set for subtitles.
            else:
                with open(lang_map_filepath, "r") as fp:
                    videos = json.load(fp)
        except Exception as e:
            error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

        try:
            download_if_criteria_met(videos, lang_code=lang_code, **kwargs)
        except Exception as e:
            error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

    # now report final results
    if bad_languages:
        outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys())
        outstr += "\n" + str(bad_languages)
        logging.error(outstr)
Exemplo n.º 24
0
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH):
    """
    Translate the srts_remote_availability dictionary into language specific files
    that can be used by the cache_subtitles command.

    Note: srt map deals with amara, so uses ietf codes (e.g. en-us)
    """
    # Load the current download status
    api_info_map = softload_json(map_file, logger=logging.warn)

    # Next we want to iterate through those and create a big srt dictionary organized by language code
    remote_availability_map = {}
    for youtube_id, data in api_info_map.items():
        languages = data.get("language_codes", [])
        for lang_code in languages:
            lang_code = lcode_to_ietf(lang_code)
            if not lang_code in remote_availability_map:
                # logging.info("Creating language section '%s'" % lang_code)
                remote_availability_map[lang_code] = {}
            # This entry will be valid if it's new, otherwise it will be overwitten later
            remote_availability_map[lang_code][youtube_id] = {
                "downloaded": False,
                "api_response": "",
                "last_attempt": "",
                "last_success": "",
            }

    # Finally we need to iterate through that dictionary and create individual files for each language code
    for lang_code, new_data in remote_availability_map.items():

        # Try to open previous language file
        lang_map_filepath = get_lang_map_filepath(lang_code)
        if not os.path.exists(lang_map_filepath):
            lang_map = {}
        else:
            lang_map = softload_json(lang_map_filepath, logger=logging.error)

        # First, check to see if it's empty (e.g. no subtitles available for any videos)
        if not new_data:
            logging.info("Subtitle support for %s has been terminated; removing." % lang_code)
            if os.path.exists(lang_map_filepath):
                os.remove(lang_map_filepath)
            continue

        # Compare how many empty entries you are adding and add them to master map
        old_yt_ids = set(new_data.keys())
        new_yt_ids = set(lang_map.keys())
        yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys())
        yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys())

        if yt_ids_to_add:
            logging.info("Adding %d new YouTube IDs to language (%s)" % (len(yt_ids_to_add), lang_code))
            for yt_id in yt_ids_to_add:
                lang_map[yt_id] = new_data.get(yt_id)

        if yt_ids_to_delete:
            logging.info(
                "Deleting %d old YouTube IDs from language (%s) because they are no longer supported."
                % (len(yt_ids_to_delete), lang_code)
            )
            for yt_id in yt_ids_to_delete:
                lang_map.pop(yt_id, None)

        # Write the new file to the correct location
        logging.debug("Writing %s" % lang_map_filepath)
        ensure_dir(os.path.dirname(lang_map_filepath))
        with open(lang_map_filepath, "w") as outfile:
            json.dump(lang_map, outfile)

        # Update the big mapping with the most accurate numbers
        remote_availability_map[lang_code].update(lang_map)

    # Finally, remove any files not found in the current map at all.
    if lang_map_filepath:
        for filename in os.listdir(os.path.dirname(lang_map_filepath)):
            lang_code = lang_code = filename.split("_")[0]
            if not lang_code in remote_availability_map:
                file_to_remove = get_lang_map_filepath(lang_code)
                logging.info("Subtitle support for %s has been terminated; removing." % lang_code)
                if os.path.exists(file_to_remove):
                    os.remove(file_to_remove)
                else:
                    logging.warn(
                        "Subtitles metadata for %s not found; skipping deletion of non-existent file %s."
                        % (lang_code, file_to_remove)
                    )

    return remote_availability_map
Exemplo n.º 25
0
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH):
    """
    Translate the srts_remote_availability dictionary into language specific files
    that can be used by the cache_subtitles command.

    Note: srt map deals with amara, so uses ietf codes (e.g. en-us)
    """
    # Load the current download status
    api_info_map = softload_json(map_file, logger=logging.warn)

    # Next we want to iterate through those and create a big srt dictionary organized by language code
    remote_availability_map = {}
    for youtube_id, data in api_info_map.items():
        languages = data.get("language_codes", [])
        for lang_code in languages:
            lang_code = lcode_to_ietf(lang_code)
            if not lang_code in remote_availability_map:
                #logging.info("Creating language section '%s'" % lang_code)
                remote_availability_map[lang_code] = {}
            # This entry will be valid if it's new, otherwise it will be overwitten later
            remote_availability_map[lang_code][youtube_id] = {
                "downloaded": False,
                "api_response": "",
                "last_attempt": "",
                "last_success": "",
            }

    # Finally we need to iterate through that dictionary and create individual files for each language code
    for lang_code, new_data in remote_availability_map.items():

        # Try to open previous language file
        lang_map_filepath = get_lang_map_filepath(lang_code)
        if not os.path.exists(lang_map_filepath):
            lang_map = {}
        else:
            lang_map = softload_json(lang_map_filepath, logger=logging.error)

        # First, check to see if it's empty (e.g. no subtitles available for any videos)
        if not new_data:
            logging.info(
                "Subtitle support for %s has been terminated; removing." %
                lang_code)
            if os.path.exists(lang_map_filepath):
                os.remove(lang_map_filepath)
            continue

        # Compare how many empty entries you are adding and add them to master map
        old_yt_ids = set(new_data.keys())
        new_yt_ids = set(lang_map.keys())
        yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys())
        yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys())

        if yt_ids_to_add:
            logging.info("Adding %d new YouTube IDs to language (%s)" %
                         (len(yt_ids_to_add), lang_code))
            for yt_id in yt_ids_to_add:
                lang_map[yt_id] = new_data.get(yt_id)

        if yt_ids_to_delete:
            logging.info(
                "Deleting %d old YouTube IDs from language (%s) because they are no longer supported."
                % (len(yt_ids_to_delete), lang_code))
            for yt_id in yt_ids_to_delete:
                lang_map.pop(yt_id, None)

        # Write the new file to the correct location
        logging.debug("Writing %s" % lang_map_filepath)
        ensure_dir(os.path.dirname(lang_map_filepath))
        with open(lang_map_filepath, 'w') as outfile:
            json.dump(lang_map, outfile)

        # Update the big mapping with the most accurate numbers
        remote_availability_map[lang_code].update(lang_map)

    # Finally, remove any files not found in the current map at all.
    if lang_map_filepath:
        for filename in os.listdir(os.path.dirname(lang_map_filepath)):
            lang_code = lang_code = filename.split("_")[0]
            if not lang_code in remote_availability_map:
                file_to_remove = get_lang_map_filepath(lang_code)
                logging.info(
                    "Subtitle support for %s has been terminated; removing." %
                    lang_code)
                if os.path.exists(file_to_remove):
                    os.remove(file_to_remove)
                else:
                    logging.warn(
                        "Subtitles metadata for %s not found; skipping deletion of non-existent file %s."
                        % (lang_code, file_to_remove))

    return remote_availability_map