Пример #1
0
def test_youtubesubtitle_process_file(youtube_video_with_subs_dict):
    youtube_id = youtube_video_with_subs_dict['youtube_id']
    lang = youtube_video_with_subs_dict['subtitles_langs'][0]
    sub_file = YouTubeSubtitleFile(youtube_id=youtube_id, language=lang)
    filename = sub_file.process_file()
    assert filename is not None, 'Processing YouTubeSubtitleFile file failed'
    assert filename.endswith('.vtt'), 'Wrong extenstion for video subtitles'
    assert not filename.endswith('.' + lang + '.vtt'), 'Lang code in extension'
def make_youtube_video(tubeid, name, _id):
    video_file = YouTubeVideoFile(youtube_id = tubeid, language=getlang('en').code, high_resolution=False)
    if video_file is None:
        print ("No video.")
        return None 
    subtitle_file = YouTubeSubtitleFile(youtube_id = tubeid, language=getlang('en').code)
    if not isinstance(_id, str):
        print (_id, type(_id))
    content_node = VideoNode(
          source_id= str(_id),
          title= name,
          #author='First Last (author\'s name)',
          #description='Put file description here',
          language=getlang('en').code,
          license=LICENCE,
          files=[video_file, subtitle_file],
    )
    return content_node
Пример #3
0
def videoAssignment(material):
    video_node = VideoNode(
        source_id=material["youtubeVideo"]
        ["id"],  # usually set source_id to youtube_id
        title=material["youtubeVideo"]["title"],
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        language=getlang('en').id,
        derive_thumbnail=True,  # video-specicig flag
        thumbnail=None,
        files=[
            YouTubeVideoFile(youtube_id=material["youtubeVideo"]["id"],
                             high_resolution=False,
                             language='en'),
            YouTubeSubtitleFile(youtube_id=material["youtubeVideo"]["id"],
                                language='en')
        ])
    return video_node
def youtubeNode(url):
    #Picking out youtube video ID from URL
    url_data = urlparse(url)
    query = urlparse.parse_qs(url_data.query)
    videoID = query["v"][0]

    r = requests.get(url).text  # grabs request of the URL

    #Get video title
    bs = bs4.BeautifulSoup(r, "html.parser")
    videoTitle = bs.find_all('title', limit=1)

    #videoTitle includes html tags, stripping them
    newTitle = str(re.sub('<.*?>', '', str(videoTitle)))

    #May have to delete if there are brackets in title
    newTitle = newTitle.replace("]", '')
    newTitle = newTitle.replace("[", '')

    #Create Video Node
    video_node = VideoNode(
        source_id=videoID,  # usually set source_id to youtube_id
        title=str(newTitle),
        license=get_license(licenses.CC_BY,
                            copyright_holder='Copyright holder name'),
        language=getlang('en').id,
        derive_thumbnail=True,  # video-specicig flag
        thumbnail=None,
        files=[
            YouTubeVideoFile(youtube_id=videoID,
                             high_resolution=False,
                             language='en'),
            YouTubeSubtitleFile(youtube_id=videoID, language='en')
        ])

    #Return Video Node
    return video_node
    def add_lesson_video(self, lesson, url, title, course_title, module_title):
        LOGGER.info("Adding video for the course {}...".format(lesson.title))
        # resp = downloader.make_request(url, cookies=self.cookies).content.decode("utf-8")
        file_path = "files/{}/{}/{}/{}-video.txt".format(
            course_title, module_title, lesson.title, title)
        with open(file_path, "r") as resp:
            page = BeautifulSoup(resp, "html.parser")
            video_id = page.find(
                "div", {"youtube-api": "lesson.youtubeApi"})["video-id"]
            source_id = "{}-video".format(lesson.source_id)

            video_file = YouTubeVideoFile(youtube_id=video_id,
                                          high_resolution=True,
                                          language=CHANNEL_LANGUAGE)
            video_node = VideoNode(
                source_id=source_id,
                title=title,
                license=CC_BY_NC_SALicense(
                    copyright_holder="Google Garage Digital"),
                language=CHANNEL_LANGUAGE,
                files=[video_file],
            )

            # Add subtitles for the video
            info = ydl.extract_info(video_id, download=False)
            subtitle_languages = info["subtitles"].keys()
            for lang_code in subtitle_languages:
                if is_youtube_subtitle_file_supported_language(lang_code):
                    video_node.add_file(
                        YouTubeSubtitleFile(youtube_id=video_id,
                                            language=lang_code))
                else:
                    LOGGER.info('Unsupported subtitle language code:',
                                lang_code)

            lesson.add_child(video_node)
Пример #6
0
    def create_content_nodes(self, channel):
        """
        This function uses the methods `add_child` and `add_file` to build the
        hierarchy of topic nodes (nested folder structure) and content nodes.
        Every content node is associated with one or more files.
        """
        content_nodes_folder = TopicNode(
            source_id='uniqid001',
            title='Content Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        channel.add_child(content_nodes_folder)

        # AUDIO
        audio_nodes_folder = TopicNode(
            source_id='uniqid002',
            title='Audio Files Folder',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(audio_nodes_folder)

        audio_node = AudioNode(
            source_id='uniqid003',
            title='Whale sounds',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[],
        )
        audio_nodes_folder.add_child(audio_node)
        audio_file = AudioFile(
            path=
            './content/ricecooker-channel-files/Whale_sounds.mp3',  # note path can also be a URL
            language=getlang('en').id)
        audio_node.add_file(audio_file)

        # DOCUMENTS
        documents_folder = TopicNode(
            source_id='uniqid004',
            title='Document Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(documents_folder)

        document_node = DocumentNode(
            source_id='uniqid005',
            title=
            'The Supreme Court\u2019s Ruling in Brown vs. Board of Education',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail=None,
            files=[
                DocumentFile(
                    path=
                    './content/ricecooker-channel-files/brown-vs-board-of-education.pdf',
                    language=getlang('en').id)
            ])
        documents_folder.add_child(document_node)

        # HTML5 APPS
        html5apps_folder = TopicNode(
            source_id='uniqid006',
            title='HTML5App Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(html5apps_folder)

        html5_node = HTML5AppNode(
            source_id='uniqid007',
            title='HTMLWeb capabilities test',
            author='First Last (author\'s name)',
            description=
            'Tests different HTML/JS capabilities. What capabilities are allowed and disallowed by the sandboxed iframe used to render HTML5App nodes on Kolibri.',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_tests.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_tests.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node)

        html5_node2 = HTML5AppNode(
            source_id='uniqid008',
            title='Sample Vue.js app',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            thumbnail='./content/ricecooker-channel-files/html5_vuejs.jpg',
            files=[
                HTMLZipFile(
                    path='./content/ricecooker-channel-files/html5_vuejs.zip',
                    language=getlang('en').id)
            ])
        html5apps_folder.add_child(html5_node2)

        # VIDEOS
        videos_folder = TopicNode(
            source_id='uniqid009',
            title='Video Nodes',
            description='Put folder description here',
            author=None,
            language=getlang('en').id,
            thumbnail=None,
        )
        content_nodes_folder.add_child(videos_folder)
        video_node = VideoNode(
            source_id='uniqid010',
            title='Wave particle duality explained in 2 mins',
            author='First Last (author\'s name)',
            description='Put file description here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Copyright holder name'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                VideoFile(
                    path=
                    './content/ricecooker-channel-files/Wave_particle_duality.mp4',
                    language=getlang('en').id)
            ])
        videos_folder.add_child(video_node)

        youtube_id = 'VJyk81HmcZQ'
        video_node2 = VideoNode(
            source_id=youtube_id,  # usually set source_id to youtube_id
            title='Estimating division that results in non whole numbers',
            author='Sal Khan',
            description='Video description would go here',
            language=getlang('en').id,
            license=get_license(licenses.CC_BY,
                                copyright_holder='Khan Academy'),
            derive_thumbnail=True,  # video-specicig flag
            thumbnail=None,
            files=[
                YouTubeVideoFile(youtube_id=youtube_id,
                                 high_resolution=False,
                                 language='en'),
                YouTubeSubtitleFile(youtube_id=youtube_id, language='ko')
            ])
        videos_folder.add_child(video_node2)
Пример #7
0
def create_node(node, assessment_dict, base_path, lite_version, lang_code):

    kind = node.get('kind')
    # Exercise node creation
    if kind == 'Exercise':
        child_node = ExerciseNode(
            source_id=node['id'],
            title=node['title'],
            exercise_data={
                'mastery_model': node.get('suggested_completion_criteria')
            },
            description='' if node.get("description") is None else node.get(
                "description", '')[:400],
            license=licenses.ALL_RIGHTS_RESERVED,
            thumbnail=node.get('image_url_256'),
        )

        # build exercise urls for previews
        full_path = base_path + node.get('path').strip('khan')
        slug = full_path.split('/')[-2]
        full_path = full_path.replace(slug, 'e') + slug

        # attach Perseus questions to Exercises
        for item in node['all_assessment_items']:
            # we replace all references to assessment images with the local file path to the image
            for match in re.finditer(FILE_URL_REGEX,
                                     assessment_dict[item['id']]["item_data"]):
                file_path = str(match.group(0)).replace('\\', '')
                file_path = file_path.replace(REPLACE_STRING,
                                              IMAGE_DL_LOCATION)
                assessment_dict[item['id']]["item_data"] = re.sub(
                    FILE_URL_REGEX, file_path,
                    assessment_dict[item['id']]["item_data"], 1)
            question = PerseusQuestion(
                id=item['id'],
                raw_data=assessment_dict[item['id']]['item_data'],
                source_url=full_path if not lite_version else None,
            )
            child_node.add_question(question)

    # Topic node creation
    elif kind == 'Topic':
        child_node = TopicNode(
            source_id=node["id"],
            title=node["title"],
            description='' if node.get("description") is None else node.get(
                "description", '')[:400])

    # Video node creation
    elif kind == 'Video':
        if node.get('description_html'):
            video_description = html2text(node.get('description_html'))[:400]
        elif node.get('description'):
            video_description = node.get('description')[:400]
        else:
            video_description = ''
        # standard download url for KA videos
        download_url = "https://cdn.kastatic.org/KA-youtube-converted/{0}.mp4/{1}.mp4".format(
            node['youtube_id'], node['youtube_id'])
        files = [VideoFile(download_url)]
        files.append(
            YouTubeSubtitleFile(node['youtube_id'],
                                language=getlang(lang_code)))
        child_node = VideoNode(source_id=node["id"],
                               title=node["title"],
                               description=video_description,
                               files=files,
                               thumbnail=node.get('image_url'),
                               license=licenses.CC_BY_NC_SA)

    else:  # unknown content file format
        return None

    return child_node