def create_random_video_file(self): """ Helper function for testing video files. """ video_id = ID2SLUG_MAP.keys()[0]#random.choice(ID2SLUG_MAP.keys()) fake_video_file = os.path.join(settings.CONTENT_ROOT, "%s.mp4" % video_id) with open(fake_video_file, "w") as fh: fh.write("") self.assertTrue(os.path.exists(fake_video_file), "Make sure the video file was created, video_id='%s'." % video_id) return (fake_video_file, video_id)
def generate_dubbed_video_mappings( download_url=DUBBED_VIDEOS_SPREADSHEET_CSV_URL, csv_data=None): """ Function to do the heavy lifting in getting the dubbed videos map. Could be moved into utils """ if not csv_data: logging.info("Downloading dubbed video data from %s" % download_url) response = requests.get(download_url) if response.status_code != 200: raise CommandError( "Failed to download dubbed video CSV data: status=%s" % response.status) csv_data = response.content # This CSV file is in standard format: separated by ",", quoted by '"' logging.info("Parsing csv file.") reader = csv.reader(StringIO(csv_data)) # Build a two-level video map. # First key: language name # Second key: english youtube ID # Value: corresponding youtube ID in the new language. video_map = {} row_num = -1 try: # Loop through each row in the spreadsheet. while (True): row_num += 1 row = reader.next() if row_num < 5: # Rows 1-4 are crap. continue elif row_num == 5: # Row 5 is the header row. header_row = [ v.lower() for v in row ] # lcase all header row values (including language names) slug_idx = header_row.index("titled id") english_idx = header_row.index("english") assert slug_idx != -1, "Video slug column header should be found." assert english_idx != -1, "English video column header should be found." else: # Rows 6 and beyond are data. assert len(row) == len( header_row ), "Values line length equals headers line length" # Grab the slug and english video ID. video_slug = row[slug_idx] english_video_id = row[english_idx] assert english_video_id, "English Video ID should not be empty" assert video_slug, "Slug should not be empty" # English video is the first video ID column, # and following columns (until the end) are other languages. # Loop through those columns and, if a video exists, # add it to the dictionary. for idx in range(english_idx, len(row)): if row[idx]: # make sure there's a dubbed video lang = header_row[idx] if lang not in video_map: # add the first level if it doesn't exist video_map[lang] = {} video_map[lang][english_video_id] = row[ idx] # add the corresponding video id for the video, in this language. except StopIteration: # The loop ends when the CSV file hits the end and throws a StopIteration pass # Now, validate the mappings with our topic data missing_videos = set(ID2SLUG_MAP.keys()) - set(video_map["english"].keys()) extra_videos = set(video_map["english"].keys()) - set(ID2SLUG_MAP.keys()) if missing_videos: logging.warn( "There are %d known videos not in the list of dubbed videos" % len(missing_videos)) if extra_videos: logging.warn( "There are %d videos in the list of dubbed videos that we have never heard of." % len(extra_videos)) return (video_map, csv_data)
def generate_dubbed_video_mappings(download_url=DUBBED_VIDEOS_SPREADSHEET_CSV_URL, csv_data=None): """ Function to do the heavy lifting in getting the dubbed videos map. Could be moved into utils """ if not csv_data: logging.info("Downloading dubbed video data from %s" % download_url) response = requests.get(download_url) if response.status_code != 200: raise CommandError("Failed to download dubbed video CSV data: status=%s" % response.status) csv_data = response.content # This CSV file is in standard format: separated by ",", quoted by '"' logging.info("Parsing csv file.") reader = csv.reader(StringIO(csv_data)) # Build a two-level video map. # First key: language name # Second key: english youtube ID # Value: corresponding youtube ID in the new language. video_map = {} row_num = -1 try: # Loop through each row in the spreadsheet. while (True): row_num += 1 row = reader.next() if row_num < 5: # Rows 1-4 are crap. continue elif row_num == 5: # Row 5 is the header row. header_row = [v.lower() for v in row] # lcase all header row values (including language names) slug_idx = header_row.index("titled id") english_idx = header_row.index("english") assert slug_idx != -1, "Video slug column header should be found." assert english_idx != -1, "English video column header should be found." else: # Rows 6 and beyond are data. assert len(row) == len(header_row), "Values line length equals headers line length" # Grab the slug and english video ID. video_slug = row[slug_idx] english_video_id = row[english_idx] assert english_video_id, "English Video ID should not be empty" assert video_slug, "Slug should not be empty" # English video is the first video ID column, # and following columns (until the end) are other languages. # Loop through those columns and, if a video exists, # add it to the dictionary. for idx in range(english_idx, len(row)): if row[idx]: # make sure there's a dubbed video lang = header_row[idx] if lang not in video_map: # add the first level if it doesn't exist video_map[lang] = {} video_map[lang][english_video_id] = row[idx] # add the corresponding video id for the video, in this language. except StopIteration: # The loop ends when the CSV file hits the end and throws a StopIteration pass # Now, validate the mappings with our topic data missing_videos = set(ID2SLUG_MAP.keys()) - set(video_map["english"].keys()) extra_videos = set(video_map["english"].keys()) - set(ID2SLUG_MAP.keys()) if missing_videos: logging.warn("There are %d known videos not in the list of dubbed videos" % len(missing_videos)) if extra_videos: logging.warn("There are %d videos in the list of dubbed videos that we have never heard of." % len(extra_videos)) return (video_map, csv_data)