def download_srt_from_3rd_party(lang_codes=None, **kwargs): """Download subtitles specified by command line args""" lang_codes = lang_codes or get_all_prepped_lang_codes() bad_languages = {} for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) try: lang_map_filepath = get_lang_map_filepath(lang_code) if not os.path.exists(lang_map_filepath): videos = {} # happens if an unknown set for subtitles. else: with open(lang_map_filepath, "r") as fp: videos = json.load(fp) except Exception as e: error_msg = "Error in subtitles metadata file for %s: %s" % (lang_code, e) logging.error(error_msg) bad_languages[lang_code] = error_msg continue try: download_if_criteria_met(videos, lang_code=lang_code, **kwargs) except Exception as e: error_msg = "Error downloading subtitles for %s: %s" % (lang_code, e) logging.error(error_msg) bad_languages[lang_code] = error_msg continue # now report final results if bad_languages: outstr = "Failed to download subtitles for the following languages: %s" % (bad_languages.keys()) outstr += "\n" + str(bad_languages) logging.error(outstr)
def update_json(youtube_id, lang_code, downloaded, api_response, time_of_attempt): """Update language_srt_map to reflect download status lang_code in IETF format """ # Open JSON file filepath = get_lang_map_filepath(lang_code) try: with open(filepath, "r") as fp: language_srt_map = json.load(fp) except Exception as e: logging.error("Something went wrong while trying to open the json file (%s): %s" % (filepath, e)) return False # create updated entry entry = language_srt_map[youtube_id] entry["downloaded"] = downloaded entry["api_response"] = api_response entry["last_attempt"] = time_of_attempt if api_response == "success": entry["last_success"] = time_of_attempt # update full-size JSON with new information language_srt_map[youtube_id].update(entry) # write it to file json_file = open(filepath, "wb") json_file.write(json.dumps(language_srt_map)) json_file.close() logging.debug("File updated.") return True
def clear_subtitles_cache(lang_codes=None, locale_root=LOCALE_ROOT): """ Language codes will be converted to django format (e.g. en_US) """ lang_codes = lang_codes or os.listdir(locale_root) for lang_code in lang_codes: lang_code = lcode_to_ietf(lang_code) # Clear the status file lm_file = get_lang_map_filepath(lang_code) with open(lm_file, "r") as fp: download_status = json.load(fp) for key in download_status: download_status[key] = { u"downloaded": False, u"last_success": u"", u"last_attempt": u"", u"api_response": u"", } with open(lm_file, "w") as fp: json.dump(download_status, fp) # Delete all srt files srt_path = get_srt_path(lang_code) if os.path.exists(srt_path): shutil.rmtree(srt_path)
def update_language_srt_map(map_file=SRTS_JSON_FILEPATH): """ Translate the srts_remote_availability dictionary into language specific files that can be used by the cache_subtitles command. Note: srt map deals with amara, so uses ietf codes (e.g. en-us) """ # Load the current download status try: with open(map_file) as fp: api_info_map = json.load(fp) except Exception as e: # Must be corrupted; start from scratch! logging.warn("Could not open %s for updates; starting from scratch. Error=%s" % (map_file, e)) api_info_map = {} # Next we want to iterate through those and create a big srt dictionary organized by language code remote_availability_map = {} for youtube_id, data in api_info_map.items(): languages = data.get("language_codes", []) for lang_code in languages: lang_code = lcode_to_ietf(lang_code) if not lang_code in remote_availability_map: #logging.info("Creating language section '%s'" % lang_code) remote_availability_map[lang_code] = {} # This entry will be valid if it's new, otherwise it will be overwitten later remote_availability_map[lang_code][youtube_id] = { "downloaded": False, "api_response": "", "last_attempt": "", "last_success": "", } # Finally we need to iterate through that dictionary and create individual files for each language code for lang_code, new_data in remote_availability_map.items(): # Try to open previous language file lang_map_filepath = get_lang_map_filepath(lang_code) if not os.path.exists(lang_map_filepath): lang_map = {} else: try: with open(lang_map_filepath, "r") as fp: lang_map = json.load(fp) except Exception as e: logging.error("Language download status mapping for (%s) is corrupted (%s), rewriting it." % (lang_code, e)) lang_map = {} # First, check to see if it's empty (e.g. no subtitles available for any videos) if not new_data: logging.info("Subtitle support for %s has been terminated; removing." % lang_code) if os.path.exists(lang_map_filepath): os.remove(lang_map_filepath) continue # Compare how many empty entries you are adding and add them to master map old_yt_ids = set(new_data.keys()) new_yt_ids = set(lang_map.keys()) yt_ids_to_add = set(new_data.keys()) - set(lang_map.keys()) yt_ids_to_delete = set(lang_map.keys()) - set(new_data.keys()) if yt_ids_to_add: logging.info("Adding %d new YouTube IDs to language (%s)" % (len(yt_ids_to_add), lang_code)) for yt_id in yt_ids_to_add: lang_map[yt_id] = new_data.get(yt_id) if yt_ids_to_delete: logging.info("Deleting %d old YouTube IDs from language (%s) because they are no longer supported." % (len(yt_ids_to_delete), lang_code)) for yt_id in yt_ids_to_delete: lang_map.pop(yt_id, None) # Write the new file to the correct location logging.debug("Writing %s" % lang_map_filepath) ensure_dir(os.path.dirname(lang_map_filepath)) with open(lang_map_filepath, 'w') as outfile: json.dump(lang_map, outfile) # Update the big mapping with the most accurate numbers remote_availability_map[lang_code].update(lang_map) # Finally, remove any files not found in the current map at all. if lang_map_filepath: for filename in os.listdir(os.path.dirname(lang_map_filepath)): lang_code = lang_code = filename.split("_")[0] if not lang_code in remote_availability_map: file_to_remove = get_lang_map_filepath(lang_code) logging.info("Subtitle support for %s has been terminated; removing." % lang_code) if os.path.exists(file_to_remove): os.remove(file_to_remove) else: logging.warn("Subtitles metadata for %s not found; skipping deletion of non-existent file %s." % (lang_code, file_to_remove)) return remote_availability_map