Exemplo n.º 1
0
 def test_xpath_text(self):
     testxml = '''<root>
         <div>
             <p>Foo</p>
         </div>
     </root>'''
     doc = xml.etree.ElementTree.fromstring(testxml)
     self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
     self.assertTrue(xpath_text(doc, 'div/bar') is None)
     self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
Exemplo n.º 2
0
 def test_xpath_text(self):
     testxml = '''<root>
         <div>
             <p>Foo</p>
         </div>
     </root>'''
     doc = xml.etree.ElementTree.fromstring(testxml)
     self.assertEqual(xpath_text(doc, 'div/p'), 'Foo')
     self.assertTrue(xpath_text(doc, 'div/bar') is None)
     self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
Exemplo n.º 3
0
 def test_xpath_text(self):
     testxml = """<root>
         <div>
             <p>Foo</p>
         </div>
     </root>"""
     doc = compat_etree_fromstring(testxml)
     self.assertEqual(xpath_text(doc, "div/p"), "Foo")
     self.assertEqual(xpath_text(doc, "div/bar", default="default"), "default")
     self.assertTrue(xpath_text(doc, "div/bar") is None)
     self.assertRaises(ExtractorError, xpath_text, doc, "div/bar", fatal=True)
Exemplo n.º 4
0
    def run(self, dl_url: str, add_webcam: bool, add_annotations: bool,
            add_cursor, keep_tmp_files: bool, filename: str):
        m_obj = re.match(self._VALID_URL, dl_url)

        video_id = m_obj.group('id')
        video_website = m_obj.group('website')

        self.to_screen("Downloading meta informations")
        # Make sure the lesson exists
        self._download_webpage(dl_url, video_id)
        self._create_tmp_dir(video_id)

        # Extract basic metadata
        metadata_url = video_website + '/presentation/' + video_id + '/metadata.xml'
        metadata = self._download_xml(metadata_url, video_id)

        shapes_url = video_website + '/presentation/' + video_id + '/shapes.svg'
        shapes = self._download_xml(shapes_url, video_id)

        cursor_url = video_website + '/presentation/' + video_id + '/cursor.xml'
        cursor_infos = self._download_xml(cursor_url, video_id)

        # Parse metadata.xml
        meta = metadata.find('./meta')
        start_time = xpath_text(metadata, 'start_time')
        recording_duration = float(xpath_text(
            metadata, './playback/duration')) / 1000.0  # in seconds
        title = xpath_text(meta, 'meetingName')
        try:
            bbb_origin_version = xpath_text(meta, 'bbb-origin-version')
            if bbb_origin_version is not None:
                bbb_version = bbb_origin_version.split(' ')[0]
                self.to_screen("BBB version: " + bbb_version)
        except Exception:
            pass

        # Downloading Slides
        images = list()
        self.xml_find_rec(shapes, _s('svg:image'), images)
        # images = shapes.findall(_s("./svg:image[@class='slide']"))
        slides_infos = []
        bonus_images = []
        img_path_to_filename = {}
        counter = 0
        for image in images:
            img_path = image.get(_x('xlink:href'))
            image_url = video_website + '/presentation/' + video_id + '/' + img_path
            image_width = int(float(image.get('width')))
            image_height = int(float(image.get('height')))

            if not image.get('class') or image.get('class') != 'slide':
                image_filename = image_url.split('/')[-1]
                image_path = video_id + '/' + image_filename
                bonus_images.append(
                    BonusImage(
                        image_url,
                        image_filename,
                        image_path,
                        image_width,
                        image_height,
                    ))
                continue

            image_id = image.get('id')
            slide_annotations = shapes.find(
                _s("./svg:g[@image='{}']".format(image_id)))

            if img_path.endswith('deskshare.png'):
                image_url = video_website + '/presentation/' + video_id + '/deskshare/deskshare.webm'
                slide_filename = 'deskshare.webm'
            else:
                if img_path not in img_path_to_filename:
                    slide_filename = 'slide-{:03d}'.format(
                        counter) + '.' + determine_ext(img_path)
                    img_path_to_filename[img_path] = slide_filename
                    counter += 1
                else:
                    slide_filename = img_path_to_filename[img_path]

            slide_path = video_id + '/' + slide_filename
            slide_ts_in = float(image.get('in'))
            slide_ts_out = float(image.get('out'))
            slide_ts_duration = max(
                0.0,
                min(recording_duration - slide_ts_in,
                    slide_ts_out - slide_ts_in))

            slides_infos.append(
                Slide(
                    image_id,
                    image_url,
                    slide_filename,
                    slide_path,
                    image_width,
                    image_height,
                    slide_ts_in,
                    slide_ts_out,
                    slide_ts_duration,
                    slide_annotations,
                ))

        # We now change the xml tree, all hrefs of all images now point to local files
        for image in images:
            image.attrib[_x('xlink:href')] = video_id + '/' + image.attrib[_x(
                'xlink:href')].split('/')[-1]

        self.to_screen("Downloading slides")
        self._write_slides(slides_infos, self.ydl)
        self._write_slides(bonus_images, self.ydl)
        if add_annotations:
            slides_infos = self._add_annotations(slides_infos)
        if add_cursor:
            slides_infos = self._add_cursor(slides_infos, cursor_infos)

        # Downlaoding Webcam / Deskshare
        video_base_url = video_website + '/presentation/' + video_id

        if not self.verbose:
            self.ydl.to_stderr_backup = self.ydl.to_stderr
            self.ydl.to_stderr = types.MethodType(dummy_to_stderr, self.ydl)

        webcams_path = video_id + '/webcams.webm'
        try:
            self.to_screen("Downloading webcams.webm")
            webcams_dl = {
                'id': video_id,
                'title': title,
                'url': video_base_url + '/video/webcams.webm',
                'timestamp': int(start_time),
            }
            self.ydl.params['outtmpl'] = webcams_path
            self.ydl.process_ie_result(webcams_dl)
        except DownloadError:
            self.to_screen(
                "Downloading webcams.webm failed! Downloading webcams.mp4 instead"
            )
            webcams_path = video_id + '/webcams.mp4'
            try:
                webcams_dl = {
                    'id': video_id,
                    'title': title,
                    'url': video_base_url + '/video/webcams.mp4',
                    'timestamp': int(start_time),
                }
                self.ydl.params['outtmpl'] = webcams_path
                self.ydl.process_ie_result(webcams_dl)
            except DownloadError:
                webcams_path = None
                self.to_screen("Error: Downloading webcams.mp4 failed!")

        deskshare_path = video_id + '/deskshare.webm'
        try:
            self.to_screen("Downloading deskshare.webm")
            deskshare_dl = {
                'id': video_id,
                'title': title,
                'url': video_base_url + '/deskshare/deskshare.webm',
                'timestamp': int(start_time),
            }
            self.ydl.params['outtmpl'] = deskshare_path
            self.ydl.process_ie_result(deskshare_dl)
        except DownloadError:
            self.to_screen(
                "Downloading deskshare.webm failed! Downloading deskshare.mp4 instead"
            )
            deskshare_path = video_id + '/deskshare.mp4'
            try:
                deskshare_dl = {
                    'id': video_id,
                    'title': title,
                    'url': video_base_url + '/deskshare/deskshare.mp4',
                    'timestamp': int(start_time),
                }
                self.ydl.params['outtmpl'] = deskshare_path
                self.ydl.process_ie_result(deskshare_dl)
            except DownloadError:
                deskshare_path = None
                self.to_screen(
                    "Warning: Downloading deskshare.mp4 failed - No desk was likely shared in this session."
                )

        if not self.verbose:
            self.ydl.to_stderr = self.ydl.to_stderr_backup
        # Post processing
        slideshow_w, slideshow_h = self._rescale_slides(slides_infos)

        slideshow_path = self._create_slideshow(slides_infos, video_id,
                                                slideshow_w, slideshow_h)

        formatted_date = datetime.fromtimestamp(
            int(start_time) / 1000).strftime('%Y-%m-%dT%H-%M-%S')

        if filename is not None:
            result_path = filename
        else:
            result_path = formatted_date + '_' + title.replace(
                '/', '_', title.count('/')) + '.mp4'

        self.to_screen("Mux Slideshow")
        webcam_w, webcam_h = self._get_webcam_size(slideshow_w, slideshow_h)

        if os.path.isfile(result_path):
            self.report_warning("Final Slideshow already exists. Abort!")
            return

        if add_webcam:
            self.ffmpeg.mux_slideshow_with_webcam(slideshow_path, webcams_path,
                                                  webcam_w, webcam_h,
                                                  result_path)
        else:
            self.ffmpeg.mux_slideshow(slideshow_path, webcams_path,
                                      result_path)

        if not keep_tmp_files:
            self.to_screen("Cleanup")
            self._remove_tmp_dir(video_id)
Exemplo n.º 5
0
    def run(self, dl_url: str, add_webcam: bool, add_annotations: bool, add_cursor, keep_tmp_files: bool, filename: str):
        m_obj = self._VALID_URL_RE.match(dl_url)

        video_id = m_obj.group('id')
        video_website = m_obj.group('website')

        self.to_screen("Downloading meta informations")
        # Make sure the lesson exists
        self._download_webpage(dl_url, video_id)
        self._create_tmp_dir(video_id)

        # Extract basic metadata
        metadata_url = video_website + '/presentation/' + video_id + '/metadata.xml'
        metadata = self._download_xml(metadata_url, video_id)

        shapes_url = video_website + '/presentation/' + video_id + '/shapes.svg'
        shapes = self._download_xml(shapes_url, video_id)

        cursor_url = video_website + '/presentation/' + video_id + '/cursor.xml'
        cursor_infos = self._download_xml(cursor_url, video_id)

        # Parse metadata.xml
        meta = metadata.find('./meta')
        start_time = xpath_text(metadata, 'start_time')
        title = xpath_text(meta, 'meetingName')
        bbb_version = xpath_text(meta, 'bbb-origin-version').split(' ')[0]
        self.to_screen("BBB version: " + bbb_version)

        # Downloading Slides
        images = shapes.findall(_s("./svg:image[@class='slide']"))
        slides_infos = []
        img_path_to_filename = {}
        counter = 0
        for image in images:
            img_path = image.get(_x('xlink:href'))

            image_id = image.get('id')
            image_url = video_website + '/presentation/' + video_id + '/' + img_path
            image_width = int(image.get('width'))
            image_height = int(image.get('height'))
            slide_annotations = shapes.find(_s("./svg:g[@image='{}']".format(image_id)))

            if img_path.endswith('deskshare.png'):
                image_url = video_website + '/presentation/' + video_id + '/deskshare/deskshare.webm'
                slide_filename = 'deskshare.webm'
            else:
                if img_path not in img_path_to_filename:
                    slide_filename = 'slide-{:03d}'.format(counter) + '.' + determine_ext(img_path)
                    img_path_to_filename[img_path] = slide_filename
                    counter += 1
                else:
                    slide_filename = img_path_to_filename[img_path]

            slide_path = video_id + '/' + slide_filename
            slide_ts_in = float(image.get('in'))
            slide_ts_out = float(image.get('out'))

            slides_infos.append(
                Slide(
                    image_id,
                    image_url,
                    slide_filename,
                    slide_path,
                    image_width,
                    image_height,
                    slide_ts_in,
                    slide_ts_out,
                    max(0, slide_ts_out - slide_ts_in),
                    slide_annotations,
                )
            )

        self.to_screen("Downloading slides")
        self._write_slides(slides_infos, self.ydl)
        if add_annotations:
            slides_infos = self._add_annotations(slides_infos)
        if add_cursor:
            slides_infos = self._add_cursor(slides_infos, cursor_infos)

        # Downlaoding Webcam / Deskshare
        video_base_url = video_website + '/presentation/' + video_id

        webcams_path = video_id + '/webcams.webm'
        try:
            self.to_screen("Downloading webcams.webm")
            webcams_dl = {
                'id': video_id,
                'title': title,
                'url': video_base_url + '/video/webcams.webm',
                'timestamp': int(start_time),
            }
            self.ydl.params['outtmpl'] = webcams_path
            self.ydl.process_ie_result(webcams_dl)
        except DownloadError:
            pass

        deskshare_path = video_id + '/deskshare.webm'
        try:
            self.to_screen("Downloading deskshare.webm")
            deskshare_dl = {
                'id': video_id,
                'title': title,
                'url': video_base_url + '/deskshare/deskshare.webm',
                'timestamp': int(start_time),
            }
            self.ydl.params['outtmpl'] = deskshare_path
            self.ydl.process_ie_result(deskshare_dl)
        except DownloadError:
            pass

        # Post processing
        slideshow_w, slideshow_h = self._rescale_slides(slides_infos)

        slideshow_path = self._create_slideshow(slides_infos, video_id, slideshow_w, slideshow_h)

        formatted_date = datetime.fromtimestamp(int(start_time) / 1000).strftime('%Y-%m-%dT%H-%M-%S')

        if filename is not None:
            result_path = filename
        else:
            result_path = formatted_date + '_' + title.replace('/','_',title.count('/')) + '.mp4'

        self.to_screen("Mux Slideshow")
        webcam_w, webcam_h = self._get_webcam_size(slideshow_w, slideshow_h)

        if os.path.isfile(result_path):
            self.report_warning("Final Slideshow already exists. Abort!")
            return

        if add_webcam:
            self.ffmpeg.mux_slideshow_with_webcam(slideshow_path, webcams_path, webcam_w, webcam_h, result_path)
        else:
            self.ffmpeg.mux_slideshow(slideshow_path, webcams_path, result_path)

        if not keep_tmp_files:
            self.to_screen("Cleanup")
            self._remove_tmp_dir(video_id)