Пример #1
0
def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt):
    """Update language_srt_map to reflect download status

    lang_code in IETF format
    """
    # Open JSON file
    filepath = get_lang_map_filepath(lang_code)
    language_srt_map = softload_json(filepath, logger=logging.error)
    if not language_srt_map:
        return False

    # create updated entry
    entry = language_srt_map[youtube_id]
    entry["downloaded"] = downloaded
    entry["api_response"] = api_response
    entry["last_attempt"] = time_of_attempt
    if api_response == "success":
        entry["last_success"] = time_of_attempt

    # update full-size JSON with new information
    language_srt_map[youtube_id].update(entry)

    # write it to file
    json_file = open(filepath, "wb")
    json_file.write(json.dumps(language_srt_map))
    json_file.close()
    logging.debug("File updated.")

    return True
def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt):
    """Update language_srt_map to reflect download status

    lang_code in IETF format
    """
    # Open JSON file
    filepath = get_lang_map_filepath(lang_code)
    language_srt_map = softload_json(filepath, logger=logging.error)
    if not language_srt_map:
        return False

    # create updated entry
    entry = language_srt_map[youtube_id]
    entry["downloaded"] = downloaded
    entry["api_response"] = api_response
    entry["last_attempt"] = time_of_attempt
    if api_response == "success":
        entry["last_success"] = time_of_attempt

    # update full-size JSON with new information
    language_srt_map[youtube_id].update(entry)

    # write it to file
    json_file = open(filepath, "wb")
    json_file.write(json.dumps(language_srt_map))
    json_file.close()
    logging.debug("File updated.")

    return True
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or get_langs_with_subtitles()
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        download_status = softload_json(lm_file, raises=True)
        for key in download_status:
            download_status[key] = {u'downloaded': False, u'last_success': u'', u'last_attempt': u'', u'api_response': u''}
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
Пример #4
0
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT):
    """
    Language codes will be converted to django format (e.g. en_US)
    """
    lang_codes = lang_codes or get_langs_with_subtitles()
    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)

        # Clear the status file
        lm_file = get_lang_map_filepath(lang_code)
        download_status = softload_json(lm_file, raises=True)
        for key in download_status:
            download_status[key] = {u'downloaded': False, u'last_success': u'', u'last_attempt': u'', u'api_response': u''}
        with open(lm_file, "w") as fp:
            json.dump(download_status, fp)

        # Delete all srt files
        srt_path = get_srt_path(lang_code)
        if os.path.exists(srt_path):
            shutil.rmtree(srt_path)
def download_srt_from_3rd_party(lang_codes=None, **kwargs):
    """Download subtitles specified by command line args"""

    lang_codes = lang_codes or get_all_prepped_lang_codes()
    bad_languages = {}

    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)
        lang_code = get_supported_language_map(lang_code)['amara']

        try:
            lang_map_filepath = get_lang_map_filepath(lang_code)
            if not os.path.exists(lang_map_filepath):
                videos = {}  # happens if an unknown set for subtitles.
            else:
                with open(lang_map_filepath, "r") as fp:
                    videos = json.load(fp)
        except Exception as e:
            error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

        try:
            download_if_criteria_met(videos, lang_code=lang_code, **kwargs)
        except Exception as e:
            error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

    # now report final results
    if bad_languages:
        outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys())
        outstr += "\n" + str(bad_languages)
        logging.error(outstr)
Пример #6
0
def download_srt_from_3rd_party(lang_codes=None, **kwargs):
    """Download subtitles specified by command line args"""

    lang_codes = lang_codes or get_all_prepped_lang_codes()
    bad_languages = {}

    for lang_code in lang_codes:
        lang_code = lcode_to_ietf(lang_code)
        lang_code = get_supported_language_map(lang_code)['amara']

        try:
            lang_map_filepath = get_lang_map_filepath(lang_code)
            if not os.path.exists(lang_map_filepath):
                videos = {}  # happens if an unknown set for subtitles.
            else:
                with open(lang_map_filepath, "r") as fp:
                    videos = json.load(fp)
        except Exception as e:
            error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

        try:
            download_if_criteria_met(videos, lang_code=lang_code, **kwargs)
        except Exception as e:
            error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e)
            logging.error(error_msg)
            bad_languages[lang_code] = error_msg
            continue

    # now report final results
    if bad_languages:
        outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys())
        outstr += "\n" + str(bad_languages)
        logging.error(outstr)
Пример #7
0
class Command(BaseCommand):
    help = "Update the mapping of subtitles available by language for each video. Location: %s" % (
        get_lang_map_filepath("<lang_code>"), )

    option_list = BaseCommand.option_list + (
        # Basic options
        make_option(
            '-f',
            '--force',
            action='store_true',
            dest='force',
            default=False,
            help=
            "Force a new mapping. Cannot be run with other options. Fetches new data for every one of our videos and overwrites current data with fresh data from Amara. Should really only ever be run once, because data can be updated from then on with '-s all'.",
            metavar="FORCE"),
        make_option(
            '-r',
            '--response-code',
            action='store',
            dest='response_code',
            default=None,
            help=
            "Which api-response code to recheck. Can be combined with -d. USAGE: '-r all', '-r client-error', or '-r server-error' (default: None (only download new video info)).",
            metavar="RESPONSE_CODE"),
        make_option(
            '-d',
            '--date-since-attempt',
            action='store',
            dest='date_since_attempt',
            default=None,
            help=
            "Setting a date flag will update only those entries which have not been attempted since that date. Can be combined with -r. This could potentially be useful for updating old subtitles. USAGE: '-d MM/DD/YYYY'"
        ),
        make_option(
            '-y',
            '--days-since-attempt',
            action='store',
            dest='days_since_attempt',
            default=1,
            help=
            "Setting # of days since last attempt; will compute date.  USAGE: '-y 1'"
        ),
    )

    def handle(self, *args, **options):
        if not settings.CENTRAL_SERVER:
            raise CommandError("This must only be run on the central server.")

        # Set up the refresh date
        if not options["date_since_attempt"]:
            date_since_attempt = datetime.datetime.now() - datetime.timedelta(
                days=options["days_since_attempt"])
            options["date_since_attempt"] = date_since_attempt.strftime(
                "%m/%d/%Y")
        converted_date = convert_date_input(options.get("date_since_attempt"))

        updated_mappings = create_all_mappings(
            force=options.get("force"),
            frequency_to_save=5,
            response_to_check=options.get("response_code"),
            date_to_check=converted_date)
        logging.info(
            "Executed successfully. Updating language => subtitle mapping to record any changes!"
        )

        if updated_mappings:
            language_srt_map = update_language_srt_map()
            print_language_availability_table(language_srt_map)

        logging.info("Process complete.")
Пример #8
0
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH):
    """
    Translate the srts_remote_availability dictionary into language specific files
    that can be used by the cache_subtitles command.

    Note: srt map deals with amara, so uses ietf codes (e.g. en-us)
    """
    # Load the current download status
    api_info_map = softload_json(map_file, logger=logging.warn)

    # Next we want to iterate through those and create a big srt dictionary organized by language code
    remote_availability_map = {}
    for youtube_id, data in api_info_map.items():
        languages = data.get("language_codes", [])
        for lang_code in languages:
            lang_code = lcode_to_ietf(lang_code)
            if not lang_code in remote_availability_map:
                #logging.info("Creating language section '%s'" % lang_code)
                remote_availability_map[lang_code] = {}
            # This entry will be valid if it's new, otherwise it will be overwitten later
            remote_availability_map[lang_code][youtube_id] = {
                "downloaded": False,
                "api_response": "",
                "last_attempt": "",
                "last_success": "",
            }

    # Finally we need to iterate through that dictionary and create individual files for each language code
    for lang_code, new_data in remote_availability_map.items():

        # Try to open previous language file
        lang_map_filepath = get_lang_map_filepath(lang_code)
        if not os.path.exists(lang_map_filepath):
            lang_map = {}
        else:
            lang_map = softload_json(lang_map_filepath, logger=logging.error)

        # First, check to see if it's empty (e.g. no subtitles available for any videos)
        if not new_data:
            logging.info(
                "Subtitle support for %s has been terminated; removing." %
                lang_code)
            if os.path.exists(lang_map_filepath):
                os.remove(lang_map_filepath)
            continue

        # Compare how many empty entries you are adding and add them to master map
        old_yt_ids = set(new_data.keys())
        new_yt_ids = set(lang_map.keys())
        yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys())
        yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys())

        if yt_ids_to_add:
            logging.info("Adding %d new YouTube IDs to language (%s)" %
                         (len(yt_ids_to_add), lang_code))
            for yt_id in yt_ids_to_add:
                lang_map[yt_id] = new_data.get(yt_id)

        if yt_ids_to_delete:
            logging.info(
                "Deleting %d old YouTube IDs from language (%s) because they are no longer supported."
                % (len(yt_ids_to_delete), lang_code))
            for yt_id in yt_ids_to_delete:
                lang_map.pop(yt_id, None)

        # Write the new file to the correct location
        logging.debug("Writing %s" % lang_map_filepath)
        ensure_dir(os.path.dirname(lang_map_filepath))
        with open(lang_map_filepath, 'w') as outfile:
            json.dump(lang_map, outfile)

        # Update the big mapping with the most accurate numbers
        remote_availability_map[lang_code].update(lang_map)

    # Finally, remove any files not found in the current map at all.
    if lang_map_filepath:
        for filename in os.listdir(os.path.dirname(lang_map_filepath)):
            lang_code = lang_code = filename.split("_")[0]
            if not lang_code in remote_availability_map:
                file_to_remove = get_lang_map_filepath(lang_code)
                logging.info(
                    "Subtitle support for %s has been terminated; removing." %
                    lang_code)
                if os.path.exists(file_to_remove):
                    os.remove(file_to_remove)
                else:
                    logging.warn(
                        "Subtitles metadata for %s not found; skipping deletion of non-existent file %s."
                        % (lang_code, file_to_remove))

    return remote_availability_map
Пример #9
0
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH):
    """
    Translate the srts_remote_availability dictionary into language specific files
    that can be used by the cache_subtitles command.

    Note: srt map deals with amara, so uses ietf codes (e.g. en-us)
    """
    # Load the current download status
    api_info_map = softload_json(map_file, logger=logging.warn)

    # Next we want to iterate through those and create a big srt dictionary organized by language code
    remote_availability_map = {}
    for youtube_id, data in api_info_map.items():
        languages = data.get("language_codes", [])
        for lang_code in languages:
            lang_code = lcode_to_ietf(lang_code)
            if not lang_code in remote_availability_map:
                # logging.info("Creating language section '%s'" % lang_code)
                remote_availability_map[lang_code] = {}
            # This entry will be valid if it's new, otherwise it will be overwitten later
            remote_availability_map[lang_code][youtube_id] = {
                "downloaded": False,
                "api_response": "",
                "last_attempt": "",
                "last_success": "",
            }

    # Finally we need to iterate through that dictionary and create individual files for each language code
    for lang_code, new_data in remote_availability_map.items():

        # Try to open previous language file
        lang_map_filepath = get_lang_map_filepath(lang_code)
        if not os.path.exists(lang_map_filepath):
            lang_map = {}
        else:
            lang_map = softload_json(lang_map_filepath, logger=logging.error)

        # First, check to see if it's empty (e.g. no subtitles available for any videos)
        if not new_data:
            logging.info("Subtitle support for %s has been terminated; removing." % lang_code)
            if os.path.exists(lang_map_filepath):
                os.remove(lang_map_filepath)
            continue

        # Compare how many empty entries you are adding and add them to master map
        old_yt_ids = set(new_data.keys())
        new_yt_ids = set(lang_map.keys())
        yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys())
        yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys())

        if yt_ids_to_add:
            logging.info("Adding %d new YouTube IDs to language (%s)" % (len(yt_ids_to_add), lang_code))
            for yt_id in yt_ids_to_add:
                lang_map[yt_id] = new_data.get(yt_id)

        if yt_ids_to_delete:
            logging.info(
                "Deleting %d old YouTube IDs from language (%s) because they are no longer supported."
                % (len(yt_ids_to_delete), lang_code)
            )
            for yt_id in yt_ids_to_delete:
                lang_map.pop(yt_id, None)

        # Write the new file to the correct location
        logging.debug("Writing %s" % lang_map_filepath)
        ensure_dir(os.path.dirname(lang_map_filepath))
        with open(lang_map_filepath, "w") as outfile:
            json.dump(lang_map, outfile)

        # Update the big mapping with the most accurate numbers
        remote_availability_map[lang_code].update(lang_map)

    # Finally, remove any files not found in the current map at all.
    if lang_map_filepath:
        for filename in os.listdir(os.path.dirname(lang_map_filepath)):
            lang_code = lang_code = filename.split("_")[0]
            if not lang_code in remote_availability_map:
                file_to_remove = get_lang_map_filepath(lang_code)
                logging.info("Subtitle support for %s has been terminated; removing." % lang_code)
                if os.path.exists(file_to_remove):
                    os.remove(file_to_remove)
                else:
                    logging.warn(
                        "Subtitles metadata for %s not found; skipping deletion of non-existent file %s."
                        % (lang_code, file_to_remove)
                    )

    return remote_availability_map