def test_xpath_text(self): testxml = '''<root> <div> <p>Foo</p> </div> </root>''' doc = xml.etree.ElementTree.fromstring(testxml) self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') self.assertTrue(xpath_text(doc, 'div/bar') is None) self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True)
def test_xpath_text(self): testxml = """<root> <div> <p>Foo</p> </div> </root>""" doc = compat_etree_fromstring(testxml) self.assertEqual(xpath_text(doc, "div/p"), "Foo") self.assertEqual(xpath_text(doc, "div/bar", default="default"), "default") self.assertTrue(xpath_text(doc, "div/bar") is None) self.assertRaises(ExtractorError, xpath_text, doc, "div/bar", fatal=True)
def run(self, dl_url: str, add_webcam: bool, add_annotations: bool, add_cursor, keep_tmp_files: bool, filename: str): m_obj = re.match(self._VALID_URL, dl_url) video_id = m_obj.group('id') video_website = m_obj.group('website') self.to_screen("Downloading meta informations") # Make sure the lesson exists self._download_webpage(dl_url, video_id) self._create_tmp_dir(video_id) # Extract basic metadata metadata_url = video_website + '/presentation/' + video_id + '/metadata.xml' metadata = self._download_xml(metadata_url, video_id) shapes_url = video_website + '/presentation/' + video_id + '/shapes.svg' shapes = self._download_xml(shapes_url, video_id) cursor_url = video_website + '/presentation/' + video_id + '/cursor.xml' cursor_infos = self._download_xml(cursor_url, video_id) # Parse metadata.xml meta = metadata.find('./meta') start_time = xpath_text(metadata, 'start_time') recording_duration = float(xpath_text( metadata, './playback/duration')) / 1000.0 # in seconds title = xpath_text(meta, 'meetingName') try: bbb_origin_version = xpath_text(meta, 'bbb-origin-version') if bbb_origin_version is not None: bbb_version = bbb_origin_version.split(' ')[0] self.to_screen("BBB version: " + bbb_version) except Exception: pass # Downloading Slides images = list() self.xml_find_rec(shapes, _s('svg:image'), images) # images = shapes.findall(_s("./svg:image[@class='slide']")) slides_infos = [] bonus_images = [] img_path_to_filename = {} counter = 0 for image in images: img_path = image.get(_x('xlink:href')) image_url = video_website + '/presentation/' + video_id + '/' + img_path image_width = int(float(image.get('width'))) image_height = int(float(image.get('height'))) if not image.get('class') or image.get('class') != 'slide': image_filename = image_url.split('/')[-1] image_path = video_id + '/' + image_filename bonus_images.append( BonusImage( image_url, image_filename, image_path, image_width, image_height, )) continue image_id = image.get('id') slide_annotations = shapes.find( _s("./svg:g[@image='{}']".format(image_id))) if img_path.endswith('deskshare.png'): image_url = video_website + '/presentation/' + video_id + '/deskshare/deskshare.webm' slide_filename = 'deskshare.webm' else: if img_path not in img_path_to_filename: slide_filename = 'slide-{:03d}'.format( counter) + '.' + determine_ext(img_path) img_path_to_filename[img_path] = slide_filename counter += 1 else: slide_filename = img_path_to_filename[img_path] slide_path = video_id + '/' + slide_filename slide_ts_in = float(image.get('in')) slide_ts_out = float(image.get('out')) slide_ts_duration = max( 0.0, min(recording_duration - slide_ts_in, slide_ts_out - slide_ts_in)) slides_infos.append( Slide( image_id, image_url, slide_filename, slide_path, image_width, image_height, slide_ts_in, slide_ts_out, slide_ts_duration, slide_annotations, )) # We now change the xml tree, all hrefs of all images now point to local files for image in images: image.attrib[_x('xlink:href')] = video_id + '/' + image.attrib[_x( 'xlink:href')].split('/')[-1] self.to_screen("Downloading slides") self._write_slides(slides_infos, self.ydl) self._write_slides(bonus_images, self.ydl) if add_annotations: slides_infos = self._add_annotations(slides_infos) if add_cursor: slides_infos = self._add_cursor(slides_infos, cursor_infos) # Downlaoding Webcam / Deskshare video_base_url = video_website + '/presentation/' + video_id if not self.verbose: self.ydl.to_stderr_backup = self.ydl.to_stderr self.ydl.to_stderr = types.MethodType(dummy_to_stderr, self.ydl) webcams_path = video_id + '/webcams.webm' try: self.to_screen("Downloading webcams.webm") webcams_dl = { 'id': video_id, 'title': title, 'url': video_base_url + '/video/webcams.webm', 'timestamp': int(start_time), } self.ydl.params['outtmpl'] = webcams_path self.ydl.process_ie_result(webcams_dl) except DownloadError: self.to_screen( "Downloading webcams.webm failed! Downloading webcams.mp4 instead" ) webcams_path = video_id + '/webcams.mp4' try: webcams_dl = { 'id': video_id, 'title': title, 'url': video_base_url + '/video/webcams.mp4', 'timestamp': int(start_time), } self.ydl.params['outtmpl'] = webcams_path self.ydl.process_ie_result(webcams_dl) except DownloadError: webcams_path = None self.to_screen("Error: Downloading webcams.mp4 failed!") deskshare_path = video_id + '/deskshare.webm' try: self.to_screen("Downloading deskshare.webm") deskshare_dl = { 'id': video_id, 'title': title, 'url': video_base_url + '/deskshare/deskshare.webm', 'timestamp': int(start_time), } self.ydl.params['outtmpl'] = deskshare_path self.ydl.process_ie_result(deskshare_dl) except DownloadError: self.to_screen( "Downloading deskshare.webm failed! Downloading deskshare.mp4 instead" ) deskshare_path = video_id + '/deskshare.mp4' try: deskshare_dl = { 'id': video_id, 'title': title, 'url': video_base_url + '/deskshare/deskshare.mp4', 'timestamp': int(start_time), } self.ydl.params['outtmpl'] = deskshare_path self.ydl.process_ie_result(deskshare_dl) except DownloadError: deskshare_path = None self.to_screen( "Warning: Downloading deskshare.mp4 failed - No desk was likely shared in this session." ) if not self.verbose: self.ydl.to_stderr = self.ydl.to_stderr_backup # Post processing slideshow_w, slideshow_h = self._rescale_slides(slides_infos) slideshow_path = self._create_slideshow(slides_infos, video_id, slideshow_w, slideshow_h) formatted_date = datetime.fromtimestamp( int(start_time) / 1000).strftime('%Y-%m-%dT%H-%M-%S') if filename is not None: result_path = filename else: result_path = formatted_date + '_' + title.replace( '/', '_', title.count('/')) + '.mp4' self.to_screen("Mux Slideshow") webcam_w, webcam_h = self._get_webcam_size(slideshow_w, slideshow_h) if os.path.isfile(result_path): self.report_warning("Final Slideshow already exists. Abort!") return if add_webcam: self.ffmpeg.mux_slideshow_with_webcam(slideshow_path, webcams_path, webcam_w, webcam_h, result_path) else: self.ffmpeg.mux_slideshow(slideshow_path, webcams_path, result_path) if not keep_tmp_files: self.to_screen("Cleanup") self._remove_tmp_dir(video_id)
def run(self, dl_url: str, add_webcam: bool, add_annotations: bool, add_cursor, keep_tmp_files: bool, filename: str): m_obj = self._VALID_URL_RE.match(dl_url) video_id = m_obj.group('id') video_website = m_obj.group('website') self.to_screen("Downloading meta informations") # Make sure the lesson exists self._download_webpage(dl_url, video_id) self._create_tmp_dir(video_id) # Extract basic metadata metadata_url = video_website + '/presentation/' + video_id + '/metadata.xml' metadata = self._download_xml(metadata_url, video_id) shapes_url = video_website + '/presentation/' + video_id + '/shapes.svg' shapes = self._download_xml(shapes_url, video_id) cursor_url = video_website + '/presentation/' + video_id + '/cursor.xml' cursor_infos = self._download_xml(cursor_url, video_id) # Parse metadata.xml meta = metadata.find('./meta') start_time = xpath_text(metadata, 'start_time') title = xpath_text(meta, 'meetingName') bbb_version = xpath_text(meta, 'bbb-origin-version').split(' ')[0] self.to_screen("BBB version: " + bbb_version) # Downloading Slides images = shapes.findall(_s("./svg:image[@class='slide']")) slides_infos = [] img_path_to_filename = {} counter = 0 for image in images: img_path = image.get(_x('xlink:href')) image_id = image.get('id') image_url = video_website + '/presentation/' + video_id + '/' + img_path image_width = int(image.get('width')) image_height = int(image.get('height')) slide_annotations = shapes.find(_s("./svg:g[@image='{}']".format(image_id))) if img_path.endswith('deskshare.png'): image_url = video_website + '/presentation/' + video_id + '/deskshare/deskshare.webm' slide_filename = 'deskshare.webm' else: if img_path not in img_path_to_filename: slide_filename = 'slide-{:03d}'.format(counter) + '.' + determine_ext(img_path) img_path_to_filename[img_path] = slide_filename counter += 1 else: slide_filename = img_path_to_filename[img_path] slide_path = video_id + '/' + slide_filename slide_ts_in = float(image.get('in')) slide_ts_out = float(image.get('out')) slides_infos.append( Slide( image_id, image_url, slide_filename, slide_path, image_width, image_height, slide_ts_in, slide_ts_out, max(0, slide_ts_out - slide_ts_in), slide_annotations, ) ) self.to_screen("Downloading slides") self._write_slides(slides_infos, self.ydl) if add_annotations: slides_infos = self._add_annotations(slides_infos) if add_cursor: slides_infos = self._add_cursor(slides_infos, cursor_infos) # Downlaoding Webcam / Deskshare video_base_url = video_website + '/presentation/' + video_id webcams_path = video_id + '/webcams.webm' try: self.to_screen("Downloading webcams.webm") webcams_dl = { 'id': video_id, 'title': title, 'url': video_base_url + '/video/webcams.webm', 'timestamp': int(start_time), } self.ydl.params['outtmpl'] = webcams_path self.ydl.process_ie_result(webcams_dl) except DownloadError: pass deskshare_path = video_id + '/deskshare.webm' try: self.to_screen("Downloading deskshare.webm") deskshare_dl = { 'id': video_id, 'title': title, 'url': video_base_url + '/deskshare/deskshare.webm', 'timestamp': int(start_time), } self.ydl.params['outtmpl'] = deskshare_path self.ydl.process_ie_result(deskshare_dl) except DownloadError: pass # Post processing slideshow_w, slideshow_h = self._rescale_slides(slides_infos) slideshow_path = self._create_slideshow(slides_infos, video_id, slideshow_w, slideshow_h) formatted_date = datetime.fromtimestamp(int(start_time) / 1000).strftime('%Y-%m-%dT%H-%M-%S') if filename is not None: result_path = filename else: result_path = formatted_date + '_' + title.replace('/','_',title.count('/')) + '.mp4' self.to_screen("Mux Slideshow") webcam_w, webcam_h = self._get_webcam_size(slideshow_w, slideshow_h) if os.path.isfile(result_path): self.report_warning("Final Slideshow already exists. Abort!") return if add_webcam: self.ffmpeg.mux_slideshow_with_webcam(slideshow_path, webcams_path, webcam_w, webcam_h, result_path) else: self.ffmpeg.mux_slideshow(slideshow_path, webcams_path, result_path) if not keep_tmp_files: self.to_screen("Cleanup") self._remove_tmp_dir(video_id)