def test_rename_transcript_success(self, edx_video_id): """ Verify that "use current transcript" in video component basic tab works as expected in case of following: 1. External video component 2. VEDA produced video component """ # In case of an external video component, the `edx_video_id` must be empty # and VEDA produced video component will have `edx_video_id` set to VAL video ID. self.item.edx_video_id = edx_video_id modulestore().update_item(self.item, self.user.id) # Make call to use current transcript from contentstore response = self.rename_transcript(self.video_usage_key) # Verify the response self.assert_response(response, expected_status_code=200, expected_message='Success') # Verify the `edx_video_id` on the video component json_response = json.loads(response.content.decode('utf-8')) expected_edx_video_id = edx_video_id if edx_video_id else json_response[ 'edx_video_id'] video = modulestore().get_item(self.video_usage_key) self.assertEqual(video.edx_video_id, expected_edx_video_id) # Verify transcript content actual_transcript = get_video_transcript_content(video.edx_video_id, language_code=u'en') actual_sjson_content = json.loads( actual_transcript['content'].decode('utf-8')) expected_sjson_content = json.loads(self.sjson_subs) self.assertDictEqual(actual_sjson_content, expected_sjson_content)
def test_replace_transcript_success(self, edx_video_id): """ Verify that "import from youtube" in video component basic tab works as expected in case of following: 1. External video component 2. VEDA produced video component """ # In case of an external video component, the `edx_video_id` must be empty # and VEDA produced video component will have `edx_video_id` set to VAL video ID. self.item.edx_video_id = edx_video_id modulestore().update_item(self.item, self.user.id) # Make call to replace transcripts from youtube response = self.replace_transcript(self.video_usage_key, self.youtube_id) # Verify the response self.assert_response(response, expected_status_code=200, expected_message='Success') # Verify the `edx_video_id` on the video component json_response = json.loads(response.content) expected_edx_video_id = edx_video_id if edx_video_id else json_response['edx_video_id'] video = modulestore().get_item(self.video_usage_key) self.assertEqual(video.edx_video_id, expected_edx_video_id) # Verify transcript content actual_transcript = get_video_transcript_content(video.edx_video_id, language_code=u'en') actual_sjson_content = json.loads(actual_transcript['content']) expected_sjson_content = json.loads(SJSON_TRANSCRIPT_CONTENT) self.assertDictEqual(actual_sjson_content, expected_sjson_content)
def get(self, request, course, *args, **kwargs): block_id = kwargs['block_id'] lang = kwargs['lang'] usage_key = BlockUsageLocator(course.id, block_type='video', block_id=block_id) video_descriptor = modulestore().get_item(usage_key) feature_enabled = is_val_transcript_feature_enabled_for_course(usage_key.course_key) try: transcripts = video_descriptor.get_transcripts_info(include_val_transcripts=feature_enabled) content, filename, mimetype = video_descriptor.get_transcript(transcripts, lang=lang) except (ValueError, NotFoundError): # Fallback mechanism for edx-val transcripts transcript = None if feature_enabled: transcript = get_video_transcript_content( language_code=lang, edx_video_id=video_descriptor.edx_video_id, youtube_id_1_0=video_descriptor.youtube_id_1_0, html5_sources=video_descriptor.html5_sources, ) if not transcript: raise Http404(u'Transcript not found for {}, lang: {}'.format(block_id, lang)) base_name, __ = os.path.splitext(os.path.basename(transcript['file_name'])) filename = '{base_name}.srt'.format(base_name=base_name) content = Transcript.convert(transcript['content'], 'sjson', 'srt') mimetype = Transcript.mime_types['srt'] except KeyError: raise Http404(u"Transcript not found for {}, lang: {}".format(block_id, lang)) response = HttpResponse(content, content_type=mimetype) response['Content-Disposition'] = 'attachment; filename="{}"'.format(filename.encode('utf-8')) return response
def get(self, request, course, *args, **kwargs): block_id = kwargs['block_id'] lang = kwargs['lang'] usage_key = BlockUsageLocator(course.id, block_type='video', block_id=block_id) video_descriptor = modulestore().get_item(usage_key) feature_enabled = is_val_transcript_feature_enabled_for_course(usage_key.course_key) try: transcripts = video_descriptor.get_transcripts_info(include_val_transcripts=feature_enabled) content, filename, mimetype = video_descriptor.get_transcript(transcripts, lang=lang) except (ValueError, NotFoundError): # Fallback mechanism for edx-val transcripts transcript = None if feature_enabled: transcript = get_video_transcript_content(video_descriptor.edx_video_id, lang) if not transcript: raise Http404(u'Transcript not found for {}, lang: {}'.format(block_id, lang)) transcript_conversion_props = dict(transcript, output_format=Transcript.SRT) transcript = convert_video_transcript(**transcript_conversion_props) filename = transcript['filename'] content = transcript['content'] mimetype = Transcript.mime_types[Transcript.SRT] except KeyError: raise Http404(u"Transcript not found for {}, lang: {}".format(block_id, lang)) response = HttpResponse(content, content_type=mimetype) response['Content-Disposition'] = 'attachment; filename="{}"'.format(filename.encode('utf-8')) return response
def download_transcripts(request): """ Passes to user requested transcripts file. Raises Http404 if unsuccessful. """ locator = request.GET.get('locator') subs_id = request.GET.get('subs_id') if not locator: log.debug('GET data without "locator" property.') raise Http404 try: item = _get_item(request, request.GET) except (InvalidKeyError, ItemNotFoundError): log.debug("Can't find item by locator.") raise Http404 if item.category != 'video': log.debug('transcripts are supported only for video" modules.') raise Http404 try: if not subs_id: raise NotFoundError filename = subs_id content_location = StaticContent.compute_location( item.location.course_key, 'subs_{filename}.srt.sjson'.format(filename=filename), ) sjson_transcript = contentstore().find(content_location).data except NotFoundError: # Try searching in VAL for the transcript as a last resort transcript = None if is_val_transcript_feature_enabled_for_course(item.location.course_key): transcript = get_video_transcript_content( language_code=u'en', edx_video_id=item.edx_video_id, youtube_id_1_0=item.youtube_id_1_0, html5_sources=item.html5_sources, ) if not transcript: raise Http404 filename = os.path.splitext(os.path.basename(transcript['file_name']))[0].encode('utf8') sjson_transcript = transcript['content'] # convert sjson content into srt format. transcript_content = Transcript.convert(sjson_transcript, input_format='sjson', output_format='srt') if not transcript_content: raise Http404 # Construct an HTTP response response = HttpResponse(transcript_content, content_type='application/x-subrip; charset=utf-8') response['Content-Disposition'] = 'attachment; filename="{filename}.srt"'.format(filename=filename) return response
def download_transcripts(request): """ Passes to user requested transcripts file. Raises Http404 if unsuccessful. """ locator = request.GET.get('locator') subs_id = request.GET.get('subs_id') if not locator: log.debug('GET data without "locator" property.') raise Http404 try: item = _get_item(request, request.GET) except (InvalidKeyError, ItemNotFoundError): log.debug("Can't find item by locator.") raise Http404 if item.category != 'video': log.debug('transcripts are supported only for video" modules.') raise Http404 try: if not subs_id: raise NotFoundError filename = subs_id content_location = StaticContent.compute_location( item.location.course_key, 'subs_{filename}.srt.sjson'.format(filename=filename), ) input_format = Transcript.SJSON transcript_content = contentstore().find(content_location).data except NotFoundError: # Try searching in VAL for the transcript as a last resort transcript = None if is_val_transcript_feature_enabled_for_course(item.location.course_key): transcript = get_video_transcript_content(edx_video_id=item.edx_video_id, language_code=u'en') if not transcript: raise Http404 name_and_extension = os.path.splitext(transcript['file_name']) filename, input_format = name_and_extension[0], name_and_extension[1][1:] transcript_content = transcript['content'] # convert sjson content into srt format. transcript_content = Transcript.convert(transcript_content, input_format=input_format, output_format=Transcript.SRT) if not transcript_content: raise Http404 # Construct an HTTP response response = HttpResponse(transcript_content, content_type='application/x-subrip; charset=utf-8') response['Content-Disposition'] = 'attachment; filename="{filename}.srt"'.format(filename=filename) return response
def test_transcript_upload_with_non_existant_edx_video_id(self): """ Test that transcript upload works as expected if `edx_video_id` set on video descriptor is different from `edx_video_id` received in POST request. """ non_existant_edx_video_id = '1111-2222-3333-4444' # Upload with non-existant `edx_video_id` response = self.upload_transcript( locator=self.video_usage_key, transcript_file=self.good_srt_file, edx_video_id=non_existant_edx_video_id ) # Verify the response self.assert_response(response, expected_status_code=400, expected_message='Invalid Video ID') # Verify transcript does not exist for non-existant `edx_video_id` self.assertIsNone(get_video_transcript_content(non_existant_edx_video_id, language_code=u'en'))
def get(self, request, course, *args, **kwargs): block_id = kwargs['block_id'] lang = kwargs['lang'] usage_key = BlockUsageLocator(course.id, block_type='video', block_id=block_id) video_descriptor = modulestore().get_item(usage_key) feature_enabled = is_val_transcript_feature_enabled_for_course( usage_key.course_key) try: transcripts = video_descriptor.get_transcripts_info( include_val_transcripts=feature_enabled) content, filename, mimetype = video_descriptor.get_transcript( transcripts, lang=lang) except (ValueError, NotFoundError): # Fallback mechanism for edx-val transcripts transcript = None if feature_enabled: transcript = get_video_transcript_content( language_code=lang, edx_video_id=video_descriptor.edx_video_id, youtube_id_1_0=video_descriptor.youtube_id_1_0, html5_sources=video_descriptor.html5_sources, ) if not transcript: raise Http404(u'Transcript not found for {}, lang: {}'.format( block_id, lang)) base_name, __ = os.path.splitext( os.path.basename(transcript['file_name'])) filename = '{base_name}.srt'.format(base_name=base_name) content = Transcript.convert(transcript['content'], 'sjson', 'srt') mimetype = Transcript.mime_types['srt'] except KeyError: raise Http404(u"Transcript not found for {}, lang: {}".format( block_id, lang)) response = HttpResponse(content, content_type=mimetype) response['Content-Disposition'] = 'attachment; filename="{}"'.format( filename.encode('utf-8')) return response
def get(self, request, course, *args, **kwargs): block_id = kwargs['block_id'] lang = kwargs['lang'] usage_key = BlockUsageLocator(course.id, block_type='video', block_id=block_id) video_descriptor = modulestore().get_item(usage_key) feature_enabled = is_val_transcript_feature_enabled_for_course( usage_key.course_key) try: transcripts = video_descriptor.get_transcripts_info( include_val_transcripts=feature_enabled) content, filename, mimetype = video_descriptor.get_transcript( transcripts, lang=lang) except (ValueError, NotFoundError): # Fallback mechanism for edx-val transcripts transcript = None if feature_enabled: transcript = get_video_transcript_content( video_descriptor.edx_video_id, lang) if not transcript: raise Http404(u'Transcript not found for {}, lang: {}'.format( block_id, lang)) transcript_conversion_props = dict(transcript, output_format=Transcript.SRT) transcript = convert_video_transcript( **transcript_conversion_props) filename = transcript['filename'] content = transcript['content'] mimetype = Transcript.mime_types[Transcript.SRT] except KeyError: raise Http404(u"Transcript not found for {}, lang: {}".format( block_id, lang)) response = HttpResponse(content, content_type=mimetype) response['Content-Disposition'] = 'attachment; filename="{}"'.format( filename.encode('utf-8')) return response
def test_transcript_upload_success(self, edx_video_id, include_bom): """ Tests transcript file upload to video component works as expected in case of following: 1. External video component 2. VEDA produced video component 3. Transcript content containing BOM character """ # In case of an external video component, the `edx_video_id` must be empty # and VEDA produced video component will have `edx_video_id` set to VAL video ID. self.item.edx_video_id = edx_video_id modulestore().update_item(self.item, self.user.id) # Upload a transcript transcript_file = self.bom_srt_file if include_bom else self.good_srt_file response = self.upload_transcript(self.video_usage_key, transcript_file, '') # Verify the response self.assert_response(response, expected_status_code=200, expected_message='Success') # Verify the `edx_video_id` on the video component json_response = json.loads(response.content.decode('utf-8')) expected_edx_video_id = edx_video_id if edx_video_id else json_response[ 'edx_video_id'] video = modulestore().get_item(self.video_usage_key) self.assertEqual(video.edx_video_id, expected_edx_video_id) # Verify transcript content actual_transcript = get_video_transcript_content(video.edx_video_id, language_code=u'en') actual_sjson_content = json.loads( actual_transcript['content'].decode('utf-8')) expected_sjson_content = json.loads( Transcript.convert(self.contents['good'], input_format=Transcript.SRT, output_format=Transcript.SJSON)) self.assertDictEqual(actual_sjson_content, expected_sjson_content)
def test_transcript_upload_success(self, edx_video_id, include_bom): """ Tests transcript file upload to video component works as expected in case of following: 1. External video component 2. VEDA produced video component 3. Transcript content containing BOM character """ # In case of an external video component, the `edx_video_id` must be empty # and VEDA produced video component will have `edx_video_id` set to VAL video ID. self.item.edx_video_id = edx_video_id modulestore().update_item(self.item, self.user.id) # Upload a transcript transcript_file = self.bom_srt_file if include_bom else self.good_srt_file response = self.upload_transcript(self.video_usage_key, transcript_file, '') # Verify the response self.assert_response(response, expected_status_code=200, expected_message='Success') # Verify the `edx_video_id` on the video component json_response = json.loads(response.content) expected_edx_video_id = edx_video_id if edx_video_id else json_response['edx_video_id'] video = modulestore().get_item(self.video_usage_key) self.assertEqual(video.edx_video_id, expected_edx_video_id) # Verify transcript content actual_transcript = get_video_transcript_content(video.edx_video_id, language_code=u'en') actual_sjson_content = json.loads(actual_transcript['content']) expected_sjson_content = json.loads(Transcript.convert( self.contents['good'], input_format=Transcript.SRT, output_format=Transcript.SJSON )) self.assertDictEqual(actual_sjson_content, expected_sjson_content)
def check_transcripts(request): """ Check state of transcripts availability. request.GET['data'] has key `videos`, which can contain any of the following:: [ {u'type': u'youtube', u'video': u'OEoXaMPEzfM', u'mode': u'youtube'}, {u'type': u'html5', u'video': u'video1', u'mode': u'mp4'} {u'type': u'html5', u'video': u'video2', u'mode': u'webm'} ] `type` is youtube or html5 `video` is html5 or youtube video_id `mode` is youtube, ,p4 or webm Returns transcripts_presence dict:: html5_local: list of html5 ids, if subtitles exist locally for them; is_youtube_mode: bool, if we have youtube_id, and as youtube mode is of higher priority, reflect this with flag; youtube_local: bool, if youtube transcripts exist locally; youtube_server: bool, if youtube transcripts exist on server; youtube_diff: bool, if youtube transcripts exist on youtube server, and are different from local youtube ones; current_item_subs: string, value of item.sub field; status: string, 'Error' or 'Success'; subs: string, new value of item.sub field, that should be set in module; command: string, action to front-end what to do and what to show to user. """ transcripts_presence = { 'html5_local': [], 'html5_equal': False, 'is_youtube_mode': False, 'youtube_local': False, 'youtube_server': False, 'youtube_diff': True, 'current_item_subs': None, 'status': 'Error', } try: __, videos, item = _validate_transcripts_data(request) except TranscriptsRequestValidationException as e: return error_response(transcripts_presence, e.message) transcripts_presence['status'] = 'Success' filename = 'subs_{0}.srt.sjson'.format(item.sub) content_location = StaticContent.compute_location(item.location.course_key, filename) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['current_item_subs'] = item.sub except NotFoundError: pass # Check for youtube transcripts presence youtube_id = videos.get('youtube', None) if youtube_id: transcripts_presence['is_youtube_mode'] = True # youtube local filename = 'subs_{0}.srt.sjson'.format(youtube_id) content_location = StaticContent.compute_location( item.location.course_key, filename) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['youtube_local'] = True except NotFoundError: log.debug("Can't find transcripts in storage for youtube id: %s", youtube_id) # youtube server youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) youtube_text_api['params']['v'] = youtube_id youtube_transcript_name = youtube_video_transcript_name( youtube_text_api) if youtube_transcript_name: youtube_text_api['params']['name'] = youtube_transcript_name youtube_response = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params']) if youtube_response.status_code == 200 and youtube_response.text: transcripts_presence['youtube_server'] = True #check youtube local and server transcripts for equality if transcripts_presence['youtube_server'] and transcripts_presence[ 'youtube_local']: try: youtube_server_subs = get_transcripts_from_youtube( youtube_id, settings, item.runtime.service(item, "i18n")) if json.loads( local_transcripts ) == youtube_server_subs: # check transcripts for equality transcripts_presence['youtube_diff'] = False except GetTranscriptsFromYouTubeException: pass # Check for html5 local transcripts presence html5_subs = [] for html5_id in videos['html5']: filename = 'subs_{0}.srt.sjson'.format(html5_id) content_location = StaticContent.compute_location( item.location.course_key, filename) try: html5_subs.append(contentstore().find(content_location).data) transcripts_presence['html5_local'].append(html5_id) except NotFoundError: log.debug( "Can't find transcripts in storage for non-youtube video_id: %s", html5_id) if len(html5_subs) == 2: # check html5 transcripts for equality transcripts_presence['html5_equal'] = json.loads( html5_subs[0]) == json.loads(html5_subs[1]) command, subs_to_use = _transcripts_logic(transcripts_presence, videos) if command == 'not_found': # Try searching in VAL for the transcript as a last resort if is_val_transcript_feature_enabled_for_course( item.location.course_key): video_transcript = get_video_transcript_content( language_code=u'en', edx_video_id=item.edx_video_id, youtube_id_1_0=item.youtube_id_1_0, html5_sources=item.html5_sources, ) command = 'found' if video_transcript else command transcripts_presence.update({ 'command': command, 'subs': subs_to_use, }) return JsonResponse(transcripts_presence)
def check_transcripts(request): """ Check state of transcripts availability. request.GET['data'] has key `videos`, which can contain any of the following:: [ {u'type': u'youtube', u'video': u'OEoXaMPEzfM', u'mode': u'youtube'}, {u'type': u'html5', u'video': u'video1', u'mode': u'mp4'} {u'type': u'html5', u'video': u'video2', u'mode': u'webm'} ] `type` is youtube or html5 `video` is html5 or youtube video_id `mode` is youtube, ,p4 or webm Returns transcripts_presence dict:: html5_local: list of html5 ids, if subtitles exist locally for them; is_youtube_mode: bool, if we have youtube_id, and as youtube mode is of higher priority, reflect this with flag; youtube_local: bool, if youtube transcripts exist locally; youtube_server: bool, if youtube transcripts exist on server; youtube_diff: bool, if youtube transcripts exist on youtube server, and are different from local youtube ones; current_item_subs: string, value of item.sub field; status: string, 'Error' or 'Success'; subs: string, new value of item.sub field, that should be set in module; command: string, action to front-end what to do and what to show to user. """ transcripts_presence = { 'html5_local': [], 'html5_equal': False, 'is_youtube_mode': False, 'youtube_local': False, 'youtube_server': False, 'youtube_diff': True, 'current_item_subs': None, 'status': 'Error', } try: __, videos, item = _validate_transcripts_data(request) except TranscriptsRequestValidationException as e: return error_response(transcripts_presence, e.message) transcripts_presence['status'] = 'Success' filename = 'subs_{0}.srt.sjson'.format(item.sub) content_location = StaticContent.compute_location(item.location.course_key, filename) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['current_item_subs'] = item.sub except NotFoundError: pass # Check for youtube transcripts presence youtube_id = videos.get('youtube', None) if youtube_id: transcripts_presence['is_youtube_mode'] = True # youtube local filename = 'subs_{0}.srt.sjson'.format(youtube_id) content_location = StaticContent.compute_location(item.location.course_key, filename) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['youtube_local'] = True except NotFoundError: log.debug("Can't find transcripts in storage for youtube id: %s", youtube_id) # youtube server youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) youtube_text_api['params']['v'] = youtube_id youtube_transcript_name = youtube_video_transcript_name(youtube_text_api) if youtube_transcript_name: youtube_text_api['params']['name'] = youtube_transcript_name youtube_response = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params']) if youtube_response.status_code == 200 and youtube_response.text: transcripts_presence['youtube_server'] = True #check youtube local and server transcripts for equality if transcripts_presence['youtube_server'] and transcripts_presence['youtube_local']: try: youtube_server_subs = get_transcripts_from_youtube( youtube_id, settings, item.runtime.service(item, "i18n") ) if json.loads(local_transcripts) == youtube_server_subs: # check transcripts for equality transcripts_presence['youtube_diff'] = False except GetTranscriptsFromYouTubeException: pass # Check for html5 local transcripts presence html5_subs = [] for html5_id in videos['html5']: filename = 'subs_{0}.srt.sjson'.format(html5_id) content_location = StaticContent.compute_location(item.location.course_key, filename) try: html5_subs.append(contentstore().find(content_location).data) transcripts_presence['html5_local'].append(html5_id) except NotFoundError: log.debug("Can't find transcripts in storage for non-youtube video_id: %s", html5_id) if len(html5_subs) == 2: # check html5 transcripts for equality transcripts_presence['html5_equal'] = json.loads(html5_subs[0]) == json.loads(html5_subs[1]) command, subs_to_use = _transcripts_logic(transcripts_presence, videos) if command == 'not_found': # Try searching in VAL for the transcript as a last resort if is_val_transcript_feature_enabled_for_course(item.location.course_key): video_transcript = get_video_transcript_content( language_code=u'en', edx_video_id=item.edx_video_id, youtube_id_1_0=item.youtube_id_1_0, html5_sources=item.html5_sources, ) command = 'found' if video_transcript else command transcripts_presence.update({ 'command': command, 'subs': subs_to_use, }) return JsonResponse(transcripts_presence)