Exemplo n.º 1
0
def validate_transcript_upload_data(request):
    """
    Validates video transcript file.

    Arguments:
        request: A WSGI request's data part.

    Returns:
        Tuple containing an error and validated data
        If there is a validation error then, validated data will be empty.
    """
    error, validated_data = None, {}
    data, files = request.POST, request.FILES
    video_locator = data.get('locator')
    edx_video_id = data.get('edx_video_id')
    if not video_locator:
        error = _(u'Video locator is required.')
    elif 'transcript-file' not in files:
        error = _(u'A transcript file is required.')
    elif os.path.splitext(files['transcript-file'].name)[1][1:] != Transcript.SRT:
        error = _(u'This transcript file type is not supported.')
    elif 'edx_video_id' not in data:
        error = _(u'Video ID is required.')

    if not error:
        error, video = validate_video_module(request, video_locator)
        if not error:
            validated_data.update({
                'video': video,
                'edx_video_id': clean_video_id(edx_video_id) or clean_video_id(video.edx_video_id),
                'transcript_file': files['transcript-file']
            })

    return error, validated_data
def validate_transcript_upload_data(request):
    """
    Validates video transcript file.

    Arguments:
        request: A WSGI request's data part.

    Returns:
        Tuple containing an error and validated data
        If there is a validation error then, validated data will be empty.
    """
    error, validated_data = None, {}
    data, files = request.POST, request.FILES
    video_locator = data.get('locator')
    edx_video_id = data.get('edx_video_id')
    if not video_locator:
        error = _(u'Video locator is required.')
    elif 'transcript-file' not in files:
        error = _(u'A transcript file is required.')
    elif os.path.splitext(files['transcript-file'].name)[1][1:] != Transcript.SRT:
        error = _(u'This transcript file type is not supported.')
    elif 'edx_video_id' not in data:
        error = _(u'Video ID is required.')

    if not error:
        error, video = validate_video_module(request, video_locator)
        if not error:
            validated_data.update({
                'video': video,
                'edx_video_id': clean_video_id(edx_video_id) or clean_video_id(video.edx_video_id),
                'transcript_file': files['transcript-file']
            })

    return error, validated_data
Exemplo n.º 3
0
def save_transcript_to_storage(edx_video_id,
                               language_code,
                               transcript_content,
                               file_format=Transcript.SJSON,
                               force_update=False):
    """
    Pushes a given transcript's data to django storage.
    """
    try:
        result = None
        edx_video_id = clean_video_id(edx_video_id)
        if force_update:
            result = create_or_update_video_transcript(
                edx_video_id, language_code, dict({'file_format':
                                                   file_format}),
                ContentFile(transcript_content))
            LOGGER.info(
                "[Transcript migration] save_transcript_to_storage %s for %s with create_or_update method",
                True if result else False, edx_video_id)
        else:
            result = create_video_transcript(edx_video_id, language_code,
                                             file_format,
                                             ContentFile(transcript_content))
            LOGGER.info(
                "[Transcript migration] save_transcript_to_storage %s for %s with create method",
                result, edx_video_id)
        return result
    except ValCannotCreateError as err:
        LOGGER.exception(
            "[Transcript migration] save_transcript_to_storage_failed: %s",
            err)
        raise
Exemplo n.º 4
0
 def test_get_transcript_val_exceptions(self, exception_to_raise, mock_Transcript):
     """
     Verify that `get_transcript_from_val` function raises `NotFoundError` when specified exceptions raised.
     """
     mock_Transcript.convert.side_effect = exception_to_raise
     transcripts_info = self.video.get_transcripts_info()
     lang = self.video.get_default_transcript_language(transcripts_info)
     edx_video_id = transcripts_utils.clean_video_id(self.video.edx_video_id)
     with self.assertRaises(NotFoundError):
         transcripts_utils.get_transcript_from_val(
             edx_video_id,
             lang=lang,
             output_format=transcripts_utils.Transcript.SRT
         )
Exemplo n.º 5
0
def async_migrate_transcript(self, course_key, **kwargs):
    #pylint: disable=unused-argument
    """
    Migrates the transcripts of all videos in a course as a new celery task.
    """
    try:
        if not modulestore().get_course(CourseKey.from_string(course_key)):
            raise KeyError(u'Invalid course key: ' + unicode(course_key))
    except KeyError as exc:
        LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc))
        return 'Failed: course {course_key} with exception {exception}'.format(
            course_key=course_key,
            exception=text_type(exc)
        )
    force_update = kwargs['force_update']
    sub_tasks = []

    all_videos = get_videos_from_store(CourseKey.from_string(course_key))
    LOGGER.info(
        "[Transcript migration] process for course %s started. Migrating %s videos",
        course_key,
        len(all_videos)
    )
    for video in all_videos:
        all_lang_transcripts = video.transcripts
        english_transcript = video.sub
        if english_transcript:
            all_lang_transcripts.update({'en': video.sub})
        for lang, _ in all_lang_transcripts.items():
            transcript_already_present = is_transcript_available(
                clean_video_id(video.edx_video_id),
                lang
            )
            if transcript_already_present and force_update:
                sub_tasks.append(async_migrate_transcript_subtask.s(
                    video, lang, True, **kwargs
                ))
            elif not transcript_already_present:
                sub_tasks.append(async_migrate_transcript_subtask.s(
                    video, lang, False, **kwargs
                ))
    LOGGER.info("[Transcript migration] Migrating %s transcripts", len(sub_tasks))
    callback = task_status_callback.s()
    status = chord(sub_tasks)(callback)
    LOGGER.info(
        "[Transcript migration] process for course %s ended. Processed %s transcripts",
        course_key,
        len(status.get())
    )
    return status.get()
 def test_get_transcript_val_exceptions(self, exception_to_raise, mock_Transcript):
     """
     Verify that `get_transcript_from_val` function raises `NotFoundError` when specified exceptions raised.
     """
     mock_Transcript.convert.side_effect = exception_to_raise
     transcripts_info = self.video.get_transcripts_info()
     lang = self.video.get_default_transcript_language(transcripts_info)
     edx_video_id = transcripts_utils.clean_video_id(self.video.edx_video_id)
     with self.assertRaises(NotFoundError):
         transcripts_utils.get_transcript_from_val(
             edx_video_id,
             lang=lang,
             output_format=transcripts_utils.Transcript.SRT
         )
Exemplo n.º 7
0
def link_video_to_component(video_component, user):
    """
    Links a VAL video to the video component.

    Arguments:
        video_component: video descriptor item.
        user: A requesting user.

    Returns:
        A cleaned Video ID.
    """
    edx_video_id = clean_video_id(video_component.edx_video_id)
    if not edx_video_id:
        edx_video_id = create_external_video(display_name=u'external video')
        video_component.edx_video_id = edx_video_id
        video_component.save_with_metadata(user)

    return edx_video_id
Exemplo n.º 8
0
def link_video_to_component(video_component, user):
    """
    Links a VAL video to the video component.

    Arguments:
        video_component: video descriptor item.
        user: A requesting user.

    Returns:
        A cleaned Video ID.
    """
    edx_video_id = clean_video_id(video_component.edx_video_id)
    if not edx_video_id:
        edx_video_id = create_external_video(display_name=u'external video')
        video_component.edx_video_id = edx_video_id
        video_component.save_with_metadata(user)

    return edx_video_id
Exemplo n.º 9
0
def async_migrate_transcript(self, course_key, **kwargs):
    #pylint: disable=unused-argument
    """
    Migrates the transcripts of all videos in a course as a new celery task.
    """
    try:
        if not modulestore().get_course(CourseKey.from_string(course_key)):
            raise KeyError(u'Invalid course key: ' + unicode(course_key))
    except KeyError as exc:
        LOGGER.exception('[Transcript migration] Exception: %r',
                         text_type(exc))
        return 'Failed: course {course_key} with exception {exception}'.format(
            course_key=course_key, exception=text_type(exc))
    force_update = kwargs['force_update']
    sub_tasks = []

    all_videos = get_videos_from_store(CourseKey.from_string(course_key))
    LOGGER.info(
        "[Transcript migration] process for course %s started. Migrating %s videos",
        course_key, len(all_videos))
    for video in all_videos:
        all_lang_transcripts = video.transcripts
        english_transcript = video.sub
        if english_transcript:
            all_lang_transcripts.update({'en': video.sub})
        for lang, _ in all_lang_transcripts.items():
            transcript_already_present = is_transcript_available(
                clean_video_id(video.edx_video_id), lang)
            if transcript_already_present and force_update:
                sub_tasks.append(
                    async_migrate_transcript_subtask.s(video, lang, True,
                                                       **kwargs))
            elif not transcript_already_present:
                sub_tasks.append(
                    async_migrate_transcript_subtask.s(video, lang, False,
                                                       **kwargs))
    LOGGER.info("[Transcript migration] Migrating %s transcripts",
                len(sub_tasks))
    callback = task_status_callback.s()
    status = chord(sub_tasks)(callback)
    LOGGER.info(
        "[Transcript migration] process for course %s ended. Processed %s transcripts",
        course_key, len(status.get()))
    return status.get()
Exemplo n.º 10
0
def async_migrate_transcript_subtask(self, *args, **kwargs):
    #pylint: disable=unused-argument
    """
    Migrates a transcript of a given video in a course as a new celery task.
    """
    video, language_code, force_update = args
    commit = kwargs['commit']
    result = None
    if commit is not True:
        return 'Language {0} transcript of video {1} will be migrated'.format(
            language_code,
            video.edx_video_id
        )
    LOGGER.info("[Transcript migration] process for %s transcript started", language_code)
    try:
        transcript_info = video.get_transcripts_info()
        transcript_content, _, _ = get_transcript_from_contentstore(
            video, language_code, Transcript.SJSON, transcript_info)
        edx_video_id = clean_video_id(video.edx_video_id)

        if not edx_video_id:
            video.edx_video_id = create_external_video('external-video')
            video.save_with_metadata(user=User.objects.get(username='******'))
        if edx_video_id:
            result = save_transcript_to_storage(
                edx_video_id,
                language_code,
                transcript_content,
                Transcript.SJSON,
                force_update
            )
    except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError) as exc:
        LOGGER.exception('[Transcript migration] Exception: %r', text_type(exc))
        return 'Failed: language {language} of video {video} with exception {exception}'.format(
            language=language_code,
            video=video.edx_video_id,
            exception=text_type(exc)
        )
    LOGGER.info("[Transcript migration] process for %s transcript ended", language_code)
    if result is not None:
        return 'Success: language {0} of video {1}'.format(language_code, video.edx_video_id)
    else:
        return 'Failed: language {0} of video {1}'.format(language_code, video.edx_video_id)
Exemplo n.º 11
0
def async_migrate_transcript_subtask(self, *args, **kwargs):
    #pylint: disable=unused-argument
    """
    Migrates a transcript of a given video in a course as a new celery task.
    """
    video, language_code, force_update = args
    commit = kwargs['commit']
    result = None
    if commit is not True:
        return 'Language {0} transcript of video {1} will be migrated'.format(
            language_code, video.edx_video_id)
    LOGGER.info("[Transcript migration] process for %s transcript started",
                language_code)
    try:
        transcript_info = video.get_transcripts_info()
        transcript_content, _, _ = get_transcript_from_contentstore(
            video, language_code, Transcript.SJSON, transcript_info)
        edx_video_id = clean_video_id(video.edx_video_id)

        if not edx_video_id:
            video.edx_video_id = create_external_video('external-video')
            video.save_with_metadata(user=User.objects.get(username='******'))
        if edx_video_id:
            result = save_transcript_to_storage(edx_video_id, language_code,
                                                transcript_content,
                                                Transcript.SJSON, force_update)
    except (NotFoundError, TranscriptsGenerationException,
            ValCannotCreateError) as exc:
        LOGGER.exception('[Transcript migration] Exception: %r',
                         text_type(exc))
        return 'Failed: language {language} of video {video} with exception {exception}'.format(
            language=language_code,
            video=video.edx_video_id,
            exception=text_type(exc))
    LOGGER.info("[Transcript migration] process for %s transcript ended",
                language_code)
    if result is not None:
        return 'Success: language {0} of video {1}'.format(
            language_code, video.edx_video_id)
    else:
        return 'Failed: language {0} of video {1}'.format(
            language_code, video.edx_video_id)
Exemplo n.º 12
0
def _validate_transcripts_data(request):
    """
    Validates, that request contains all proper data for transcripts processing.

    Returns tuple of 3 elements::

        data: dict, loaded json from request,
        videos: parsed `data` to useful format,
        item:  video item from storage

    Raises `TranscriptsRequestValidationException` if validation is unsuccessful
    or `PermissionDenied` if user has no access.
    """
    data = json.loads(request.GET.get('data', '{}'))
    if not data:
        raise TranscriptsRequestValidationException(
            _('Incoming video data is empty.'))

    try:
        item = _get_item(request, data)
    except (InvalidKeyError, ItemNotFoundError):
        raise TranscriptsRequestValidationException(
            _("Can't find item by locator."))

    if item.category != 'video':
        raise TranscriptsRequestValidationException(
            _('Transcripts are supported only for "video" modules.'))

    # parse data form request.GET.['data']['video'] to useful format
    videos = {'youtube': '', 'html5': {}}
    for video_data in data.get('videos'):
        if video_data['type'] == 'youtube':
            videos['youtube'] = video_data['video']
        elif video_data['type'] == 'edx_video_id':
            if clean_video_id(video_data['video']):
                videos['edx_video_id'] = video_data['video']
        else:  # do not add same html5 videos
            if videos['html5'].get('video') != video_data['video']:
                videos['html5'][video_data['video']] = video_data['mode']

    return data, videos, item
Exemplo n.º 13
0
def _validate_transcripts_data(request):
    """
    Validates, that request contains all proper data for transcripts processing.

    Returns tuple of 3 elements::

        data: dict, loaded json from request,
        videos: parsed `data` to useful format,
        item:  video item from storage

    Raises `TranscriptsRequestValidationException` if validation is unsuccessful
    or `PermissionDenied` if user has no access.
    """
    data = json.loads(request.GET.get('data', '{}'))
    if not data:
        raise TranscriptsRequestValidationException(_('Incoming video data is empty.'))

    try:
        item = _get_item(request, data)
    except (InvalidKeyError, ItemNotFoundError):
        raise TranscriptsRequestValidationException(_("Can't find item by locator."))

    if item.category != 'video':
        raise TranscriptsRequestValidationException(_('Transcripts are supported only for "video" modules.'))

    # parse data form request.GET.['data']['video'] to useful format
    videos = {'youtube': '', 'html5': {}}
    for video_data in data.get('videos'):
        if video_data['type'] == 'youtube':
            videos['youtube'] = video_data['video']
        elif video_data['type'] == 'edx_video_id':
            if clean_video_id(video_data['video']):
                videos['edx_video_id'] = video_data['video']
        else:  # do not add same html5 videos
            if videos['html5'].get('video') != video_data['video']:
                videos['html5'][video_data['video']] = video_data['mode']

    return data, videos, item
Exemplo n.º 14
0
def save_transcript_to_storage(
        edx_video_id,
        language_code,
        transcript_content,
        file_format=Transcript.SJSON,
        force_update=False
):
    """
    Pushes a given transcript's data to django storage.
    """
    try:
        result = None
        edx_video_id = clean_video_id(edx_video_id)
        if force_update:
            result = create_or_update_video_transcript(
                edx_video_id,
                language_code,
                dict({'file_format': file_format}),
                ContentFile(transcript_content)
            )
            LOGGER.info("[Transcript migration] save_transcript_to_storage %s for %s with create_or_update method",
                        True if result else False, edx_video_id)
        else:
            result = create_video_transcript(
                edx_video_id,
                language_code,
                file_format,
                ContentFile(transcript_content)
            )
            LOGGER.info(
                "[Transcript migration] save_transcript_to_storage %s for %s with create method",
                result,
                edx_video_id
            )
        return result
    except ValCannotCreateError as err:
        LOGGER.exception("[Transcript migration] save_transcript_to_storage_failed: %s", err)
        raise
Exemplo n.º 15
0
def check_transcripts(request):
    """
    Check state of transcripts availability.

    request.GET['data'] has key `videos`, which can contain any of the following::

        [
            {u'type': u'youtube', u'video': u'OEoXaMPEzfM', u'mode': u'youtube'},
            {u'type': u'html5',    u'video': u'video1',             u'mode': u'mp4'}
            {u'type': u'html5',    u'video': u'video2',             u'mode': u'webm'}
        ]
        `type` is youtube or html5
        `video` is html5 or youtube video_id
        `mode` is youtube, ,p4 or webm

    Returns transcripts_presence dict::

        html5_local: list of html5 ids, if subtitles exist locally for them;
        is_youtube_mode: bool, if we have youtube_id, and as youtube mode is of higher priority, reflect this with flag;
        youtube_local: bool, if youtube transcripts exist locally;
        youtube_server: bool, if youtube transcripts exist on server;
        youtube_diff: bool, if youtube transcripts exist on youtube server, and are different from local youtube ones;
        current_item_subs: string, value of item.sub field;
        status: string, 'Error' or 'Success';
        subs: string, new value of item.sub field, that should be set in module;
        command: string, action to front-end what to do and what to show to user.
    """
    transcripts_presence = {
        'html5_local': [],
        'html5_equal': False,
        'is_youtube_mode': False,
        'youtube_local': False,
        'youtube_server': False,
        'youtube_diff': True,
        'current_item_subs': None,
        'status': 'Error',
    }

    try:
        __, videos, item = _validate_transcripts_data(request)
    except TranscriptsRequestValidationException as e:
        return error_response(transcripts_presence, text_type(e))

    transcripts_presence['status'] = 'Success'

    try:
        edx_video_id = clean_video_id(videos.get('edx_video_id'))
        get_transcript_from_val(edx_video_id=edx_video_id, lang=u'en')
        command = 'found'
    except NotFoundError:
        filename = 'subs_{0}.srt.sjson'.format(item.sub)
        content_location = StaticContent.compute_location(item.location.course_key, filename)
        try:
            local_transcripts = contentstore().find(content_location).data
            transcripts_presence['current_item_subs'] = item.sub
        except NotFoundError:
            pass

        # Check for youtube transcripts presence
        youtube_id = videos.get('youtube', None)
        if youtube_id:
            transcripts_presence['is_youtube_mode'] = True

            # youtube local
            filename = 'subs_{0}.srt.sjson'.format(youtube_id)
            content_location = StaticContent.compute_location(item.location.course_key, filename)
            try:
                local_transcripts = contentstore().find(content_location).data
                transcripts_presence['youtube_local'] = True
            except NotFoundError:
                log.debug(u"Can't find transcripts in storage for youtube id: %s", youtube_id)

            # youtube server
            youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
            youtube_text_api['params']['v'] = youtube_id
            youtube_transcript_name = youtube_video_transcript_name(youtube_text_api)
            if youtube_transcript_name:
                youtube_text_api['params']['name'] = youtube_transcript_name
            youtube_response = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params'])

            if youtube_response.status_code == 200 and youtube_response.text:
                transcripts_presence['youtube_server'] = True
            #check youtube local and server transcripts for equality
            if transcripts_presence['youtube_server'] and transcripts_presence['youtube_local']:
                try:
                    youtube_server_subs = get_transcripts_from_youtube(
                        youtube_id,
                        settings,
                        item.runtime.service(item, "i18n")
                    )
                    if json.loads(local_transcripts) == youtube_server_subs:  # check transcripts for equality
                        transcripts_presence['youtube_diff'] = False
                except GetTranscriptsFromYouTubeException:
                    pass

        # Check for html5 local transcripts presence
        html5_subs = []
        for html5_id in videos['html5']:
            filename = 'subs_{0}.srt.sjson'.format(html5_id)
            content_location = StaticContent.compute_location(item.location.course_key, filename)
            try:
                html5_subs.append(contentstore().find(content_location).data)
                transcripts_presence['html5_local'].append(html5_id)
            except NotFoundError:
                log.debug(u"Can't find transcripts in storage for non-youtube video_id: %s", html5_id)
            if len(html5_subs) == 2:  # check html5 transcripts for equality
                transcripts_presence['html5_equal'] = json.loads(html5_subs[0]) == json.loads(html5_subs[1])

        command, __ = _transcripts_logic(transcripts_presence, videos)

    transcripts_presence.update({'command': command})
    return JsonResponse(transcripts_presence)
Exemplo n.º 16
0
def check_transcripts(request):
    """
    Check state of transcripts availability.

    request.GET['data'] has key `videos`, which can contain any of the following::

        [
            {u'type': u'youtube', u'video': u'OEoXaMPEzfM', u'mode': u'youtube'},
            {u'type': u'html5',    u'video': u'video1',             u'mode': u'mp4'}
            {u'type': u'html5',    u'video': u'video2',             u'mode': u'webm'}
        ]
        `type` is youtube or html5
        `video` is html5 or youtube video_id
        `mode` is youtube, ,p4 or webm

    Returns transcripts_presence dict::

        html5_local: list of html5 ids, if subtitles exist locally for them;
        is_youtube_mode: bool, if we have youtube_id, and as youtube mode is of higher priority, reflect this with flag;
        youtube_local: bool, if youtube transcripts exist locally;
        youtube_server: bool, if youtube transcripts exist on server;
        youtube_diff: bool, if youtube transcripts exist on youtube server, and are different from local youtube ones;
        current_item_subs: string, value of item.sub field;
        status: string, 'Error' or 'Success';
        subs: string, new value of item.sub field, that should be set in module;
        command: string, action to front-end what to do and what to show to user.
    """
    transcripts_presence = {
        'html5_local': [],
        'html5_equal': False,
        'is_youtube_mode': False,
        'youtube_local': False,
        'youtube_server': False,
        'youtube_diff': True,
        'current_item_subs': None,
        'status': 'Error',
    }

    try:
        __, videos, item = _validate_transcripts_data(request)
    except TranscriptsRequestValidationException as e:
        return error_response(transcripts_presence, text_type(e))

    transcripts_presence['status'] = 'Success'

    try:
        edx_video_id = clean_video_id(videos.get('edx_video_id'))
        get_transcript_from_val(edx_video_id=edx_video_id, lang=u'en')
        command = 'found'
    except NotFoundError:
        filename = 'subs_{0}.srt.sjson'.format(item.sub)
        content_location = StaticContent.compute_location(
            item.location.course_key, filename)
        try:
            local_transcripts = contentstore().find(
                content_location).data.decode('utf-8')
            transcripts_presence['current_item_subs'] = item.sub
        except NotFoundError:
            pass

        # Check for youtube transcripts presence
        youtube_id = videos.get('youtube', None)
        if youtube_id:
            transcripts_presence['is_youtube_mode'] = True

            # youtube local
            filename = 'subs_{0}.srt.sjson'.format(youtube_id)
            content_location = StaticContent.compute_location(
                item.location.course_key, filename)
            try:
                local_transcripts = contentstore().find(
                    content_location).data.decode('utf-8')
                transcripts_presence['youtube_local'] = True
            except NotFoundError:
                log.debug(
                    u"Can't find transcripts in storage for youtube id: %s",
                    youtube_id)

            # youtube server
            youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API'])
            youtube_text_api['params']['v'] = youtube_id
            youtube_transcript_name = youtube_video_transcript_name(
                youtube_text_api)
            if youtube_transcript_name:
                youtube_text_api['params']['name'] = youtube_transcript_name
            youtube_response = requests.get('http://' +
                                            youtube_text_api['url'],
                                            params=youtube_text_api['params'])

            if youtube_response.status_code == 200 and youtube_response.text:
                transcripts_presence['youtube_server'] = True
            #check youtube local and server transcripts for equality
            if transcripts_presence['youtube_server'] and transcripts_presence[
                    'youtube_local']:
                try:
                    youtube_server_subs = get_transcripts_from_youtube(
                        youtube_id, settings,
                        item.runtime.service(item, "i18n"))
                    if json.loads(
                            local_transcripts
                    ) == youtube_server_subs:  # check transcripts for equality
                        transcripts_presence['youtube_diff'] = False
                except GetTranscriptsFromYouTubeException:
                    pass

        # Check for html5 local transcripts presence
        html5_subs = []
        for html5_id in videos['html5']:
            filename = 'subs_{0}.srt.sjson'.format(html5_id)
            content_location = StaticContent.compute_location(
                item.location.course_key, filename)
            try:
                html5_subs.append(contentstore().find(content_location).data)
                transcripts_presence['html5_local'].append(html5_id)
            except NotFoundError:
                log.debug(
                    u"Can't find transcripts in storage for non-youtube video_id: %s",
                    html5_id)
            if len(html5_subs) == 2:  # check html5 transcripts for equality
                transcripts_presence['html5_equal'] = (json.loads(
                    html5_subs[0].decode('utf-8')) == json.loads(
                        html5_subs[1].decode('utf-8')))

        command, __ = _transcripts_logic(transcripts_presence, videos)

    transcripts_presence.update({'command': command})
    return JsonResponse(transcripts_presence)
Exemplo n.º 17
0
def async_migrate_transcript_subtask(self, *args, **kwargs):  # pylint: disable=unused-argument
    """
    Migrates a transcript of a given video in a course as a new celery task.
    """
    success, failure = 'Success', 'Failure'
    video_location, revision, language_code, force_update = args
    command_run = kwargs['command_run']
    store = modulestore()
    video = store.get_item(
        usage_key=BlockUsageLocator.from_string(video_location),
        revision=revision)
    edx_video_id = clean_video_id(video.edx_video_id)

    if not kwargs['commit']:
        LOGGER.info(
            ('[%s] [run=%s] [video-transcript-will-be-migrated] '
             '[revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]'),
            MIGRATION_LOGS_PREFIX, command_run, revision, video_location,
            edx_video_id, language_code)
        return success

    LOGGER.info((
        '[%s] [run=%s] [transcripts-migration-process-started-for-video-transcript] [revision=%s] '
        '[video=%s] [edx_video_id=%s] [language_code=%s]'),
                MIGRATION_LOGS_PREFIX, command_run, revision, video_location,
                edx_video_id, language_code)

    try:
        transcripts_info = video.get_transcripts_info()
        transcript_content, _, _ = get_transcript_from_contentstore(
            video=video,
            language=language_code,
            output_format=Transcript.SJSON,
            transcripts_info=transcripts_info,
        )

        is_video_valid = edx_video_id and is_video_available(edx_video_id)
        if not is_video_valid:
            edx_video_id = create_external_video('external-video')
            video.edx_video_id = edx_video_id

            # determine branch published/draft
            branch_setting = (ModuleStoreEnum.Branch.published_only if revision
                              == ModuleStoreEnum.RevisionOption.published_only
                              else ModuleStoreEnum.Branch.draft_preferred)
            with store.branch_setting(branch_setting):
                store.update_item(video, ModuleStoreEnum.UserID.mgmt_command)

            LOGGER.info(
                '[%s] [run=%s] [generated-edx-video-id] [revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]',
                MIGRATION_LOGS_PREFIX, command_run, revision, video_location,
                edx_video_id, language_code)

        save_transcript_to_storage(
            command_run=command_run,
            edx_video_id=edx_video_id,
            language_code=language_code,
            transcript_content=transcript_content,
            file_format=Transcript.SJSON,
            force_update=force_update,
        )
    except (NotFoundError, TranscriptsGenerationException,
            ValCannotCreateError):
        LOGGER.exception((
            '[%s] [run=%s] [video-transcript-migration-failed-with-known-exc] [revision=%s] [video=%s] '
            '[edx_video_id=%s] [language_code=%s]'), MIGRATION_LOGS_PREFIX,
                         command_run, revision, video_location, edx_video_id,
                         language_code)
        return failure
    except Exception:
        LOGGER.exception((
            '[%s] [run=%s] [video-transcript-migration-failed-with-unknown-exc] [revision=%s] '
            '[video=%s] [edx_video_id=%s] [language_code=%s]'),
                         MIGRATION_LOGS_PREFIX, command_run, revision,
                         video_location, edx_video_id, language_code)
        raise

    LOGGER.info((
        '[%s] [run=%s] [video-transcript-migration-succeeded-for-a-video] [revision=%s] '
        '[video=%s] [edx_video_id=%s] [language_code=%s]'),
                MIGRATION_LOGS_PREFIX, command_run, revision, video_location,
                edx_video_id, language_code)
    return success
Exemplo n.º 18
0
def async_migrate_transcript_subtask(self, *args, **kwargs):  # pylint: disable=unused-argument
    """
    Migrates a transcript of a given video in a course as a new celery task.
    """
    video_location, language_code, force_update = args
    store = modulestore()
    video = store.get_item(usage_key=BlockUsageLocator.from_string(video_location))
    commit = kwargs['commit']

    if not commit:
        return 'Language {language_code} transcript of video {edx_video_id} will be migrated'.format(
            language_code=language_code,
            edx_video_id=video.edx_video_id
        )

    # Start transcript's migration
    edx_video_id = clean_video_id(video.edx_video_id)

    LOGGER.info(
        "[Transcript migration] migration process is started for video [%s] language [%s].",
        edx_video_id, language_code
    )

    try:
        transcripts_info = video.get_transcripts_info()
        transcript_content, _, _ = get_transcript_from_contentstore(
            video=video,
            language=language_code,
            output_format=Transcript.SJSON,
            transcripts_info=transcripts_info,
        )

        if not edx_video_id:
            edx_video_id = create_external_video('external-video')
            video.edx_video_id = edx_video_id
            store.update_item(video, ModuleStoreEnum.UserID.mgmt_command)

        save_transcript_to_storage(
            edx_video_id=edx_video_id,
            language_code=language_code,
            transcript_content=transcript_content,
            file_format=Transcript.SJSON,
            force_update=force_update,
        )
    except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError) as exc:
        LOGGER.exception(
            '[Transcript migration] transcript migration failed for video [%s] and language [%s].',
            edx_video_id, language_code
        )
        message = 'Failed: language {language} of video {video} with exception {exception}'.format(
            language=language_code,
            video=video.edx_video_id,
            exception=text_type(exc)
        )
    except Exception:
        LOGGER.exception(
            '[Transcript migration] transcript migration failed for video [%s] and language [%s].',
            edx_video_id, language_code
        )
        raise
    else:
        message = (
            'Success: transcript (language: {language_code}, edx_video_id: {edx_video_id}) has been migrated '
            'for video [{location}].'
        ).format(edx_video_id=edx_video_id, language_code=language_code, location=unicode(video.location))

    return message
Exemplo n.º 19
0
def async_migrate_transcript_subtask(self, *args, **kwargs):  # pylint: disable=unused-argument
    """
    Migrates a transcript of a given video in a course as a new celery task.
    """
    success, failure = 'Success', 'Failure'
    video_location, revision, language_code, force_update = args
    command_run = kwargs['command_run']
    store = modulestore()
    video = store.get_item(usage_key=BlockUsageLocator.from_string(video_location), revision=revision)
    edx_video_id = clean_video_id(video.edx_video_id)

    if not kwargs['commit']:
        LOGGER.info(
            ('[%s] [run=%s] [video-transcript-will-be-migrated] '
             '[revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]'),
            MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code
        )
        return success

    LOGGER.info(
        ('[%s] [run=%s] [transcripts-migration-process-started-for-video-transcript] [revision=%s] '
         '[video=%s] [edx_video_id=%s] [language_code=%s]'),
        MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code
    )

    try:
        transcripts_info = video.get_transcripts_info()
        transcript_content, _, _ = get_transcript_from_contentstore(
            video=video,
            language=language_code,
            output_format=Transcript.SJSON,
            transcripts_info=transcripts_info,
        )

        is_video_valid = edx_video_id and is_video_available(edx_video_id)
        if not is_video_valid:
            edx_video_id = create_external_video('external-video')
            video.edx_video_id = edx_video_id

            # determine branch published/draft
            branch_setting = (
                ModuleStoreEnum.Branch.published_only
                if revision == ModuleStoreEnum.RevisionOption.published_only else
                ModuleStoreEnum.Branch.draft_preferred
            )
            with store.branch_setting(branch_setting):
                store.update_item(video, ModuleStoreEnum.UserID.mgmt_command)

            LOGGER.info(
                '[%s] [run=%s] [generated-edx-video-id] [revision=%s] [video=%s] [edx_video_id=%s] [language_code=%s]',
                MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code
            )

        save_transcript_to_storage(
            command_run=command_run,
            edx_video_id=edx_video_id,
            language_code=language_code,
            transcript_content=transcript_content,
            file_format=Transcript.SJSON,
            force_update=force_update,
        )
    except (NotFoundError, TranscriptsGenerationException, ValCannotCreateError):
        LOGGER.exception(
            ('[%s] [run=%s] [video-transcript-migration-failed-with-known-exc] [revision=%s] [video=%s] '
             '[edx_video_id=%s] [language_code=%s]'),
            MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code
        )
        return failure
    except Exception:
        LOGGER.exception(
            ('[%s] [run=%s] [video-transcript-migration-failed-with-unknown-exc] [revision=%s] '
             '[video=%s] [edx_video_id=%s] [language_code=%s]'),
            MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code
        )
        raise

    LOGGER.info(
        ('[%s] [run=%s] [video-transcript-migration-succeeded-for-a-video] [revision=%s] '
         '[video=%s] [edx_video_id=%s] [language_code=%s]'),
        MIGRATION_LOGS_PREFIX, command_run, revision, video_location, edx_video_id, language_code
    )
    return success