def _get_storyboards_from_spec(self, video_id, sb_spec): storyboards = dict() s_parts = sb_spec.split('|') base_url = s_parts[0] for i, params in enumerate(s_parts[1:]): storyboard_attrib = params.split('#') if len(storyboard_attrib) != 8: logger.warning( 'Unable to extract thumbframe from spec {}'.format(params)) continue frame_width = int_or_none(storyboard_attrib[0]) frame_height = int_or_none(storyboard_attrib[1]) total_frames = int_or_none(storyboard_attrib[2]) cols = int_or_none(storyboard_attrib[3]) rows = int_or_none(storyboard_attrib[4]) filename = storyboard_attrib[6] sigh = storyboard_attrib[7] if frame_width and frame_height and cols and rows and total_frames: frames = cols * rows width, height = frame_width * cols, frame_height * rows n_images = int(math.ceil(total_frames / float(cols * rows))) else: logger.warning( 'Unable to extract thumbframe from spec {}'.format(params)) continue storyboards_url = base_url.replace('$L', str(i)) + '&' storyboard_set = [] for j in range(n_images): url = storyboards_url.replace('$N', filename).replace( '$M', str(j)) + 'sigh=' + sigh if j == n_images - 1: remaining_frames = total_frames % (cols * rows) if remaining_frames != 0: frames = remaining_frames rows = int(math.ceil(float(remaining_frames) / rows)) height = rows * frame_height if rows == 1: cols = remaining_frames width = cols * frame_width storyboard_set.append( ThumbFramesImage(url=url, width=width, height=height, cols=cols, rows=rows, n_frames=frames)) storyboards['L{}'.format(i)] = storyboard_set return storyboards
def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) password_protected = self._search_regex( r'<form[^>]+?id="(password_form)"', webpage, 'password field', fatal=False, default=None) if password_protected is not None: self._verify_video_password(url, display_id, webpage) webpage = self._download_webpage(url, display_id) video_url = self._search_regex(r"viewMp4Url: \'(.*)\'", webpage, 'video url') title = self._html_search_regex( [r"topic: \"(.*)\",", r"<title>(.*) - Zoom</title>"], webpage, 'title') viewResolvtionsWidth = self._search_regex( r"viewResolvtionsWidth: (\d*)", webpage, 'res width', fatal=False) viewResolvtionsHeight = self._search_regex( r"viewResolvtionsHeight: (\d*)", webpage, 'res height', fatal=False) fileSize = parse_filesize( self._search_regex(r"fileSize: \'(.+)\'", webpage, 'fileSize', fatal=False)) urlprefix = url.split("zoom.us")[0] + "zoom.us/" formats = [] formats.append({ 'url': url_or_none(video_url), 'width': int_or_none(viewResolvtionsWidth), 'height': int_or_none(viewResolvtionsHeight), 'http_headers': { 'Accept': 'video/webm,video/ogg,video/*;q=0.9,application/ogg;q=0.7,audio/*;q=0.6,*/*;q=0.5', 'Referer': urlprefix, }, 'ext': "mp4", 'filesize_approx': int_or_none(fileSize), }) self._sort_formats(formats) return {'id': display_id, 'title': title, 'formats': formats}
def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._search_regex( r'<h1 class="video__body__header__title">(.+?)</h1>', webpage, 'title') data_video = self._html_search_regex( r'data-video=(["\'])(?P<id>.+?)\1', webpage, 'data-video', group='id') json_url = 'https://mediazone.vrt.be/api/v1/canvas/assets/' + data_video data = self._download_json(json_url, video_id) formats = [] for target in data['targetUrls']: if 'type' and 'url' in target: extension = utils.determine_ext(target['url']) if target['type'] == 'PROGRESSIVE_DOWNLOAD': formats.append({ 'format_id': extension, 'url': target['url'], 'protocol': 'http', }) elif target['type'] == 'HLS': formats.extend(self._extract_m3u8_formats( target['url'], video_id, entry_protocol='m3u8_native', ext='mp4', preference=0, fatal=False, m3u8_id='hls')) elif target['type'] == 'HDS': formats.append({ 'format_id': extension, 'url': target['url'], 'protocol': 'HDS', }) elif target['type'] == 'RTMP': formats.append({ 'format_id': extension, 'url': target['url'], 'protocol': 'rtmp', }) elif target['type'] == 'RTSP': formats.append({ 'format_id': extension, 'url': target['url'], 'protocol': 'rtsp', }) self._sort_formats(formats) duration = utils.int_or_none(data.get('duration')) / 1000 return { 'id': video_id, 'title': title, 'formats': formats, 'duration': duration, }
def extract_formats(self, loader_data): stream_formats = [] for stream_obj in loader_data["videoResolutionLevels"]: stream_format = { 'format_id': str(stream_obj['verticalResolution']) + "p", 'height': stream_obj['verticalResolution'], 'url': stream_obj['url'], } quality_information = re.search(r'([0-9]{3,4})x([0-9]{3,4})-([0-9]{2})p-([0-9]{3,4})kbit', stream_obj['url']) if quality_information: stream_format['width'] = int_or_none(quality_information.group(1)) stream_format['height'] = int_or_none(quality_information.group(2)) stream_format['fps'] = int_or_none(quality_information.group(3)) stream_format['tbr'] = int_or_none(quality_information.group(4)) stream_formats.append(stream_format) self._sort_formats(stream_formats) return stream_formats
def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'https://m.tiktok.com/v/%s.html' % video_id, video_id) # The webpage will have a json embedded in a <script id="__NEXT_DATA__"> tag. The JSON holds all the metadata, so fetch that out. json_string = self._html_search_regex( [r'<script\s+id="__NEXT_DATA__"[^>]*>(.*?)</script>'], webpage, 'next_data') json_data = self._parse_json(json_string, video_id) video_data = try_get(json_data, lambda x: x['props']['pageProps'], expected_type=dict) # The watermarkless video ID is embedded in the first video file, so we need to download it and get the video ID. watermarked_url = video_data['videoData']['itemInfos']['video'][ 'urls'][0] # watermarked_response = self._download_webpage(watermarked_url, video_id) # idpos = watermarked_response.index("vid:") # watermarkless_video_id = watermarked_response[idpos + 4:idpos + 36] # watermarkless_url = "https://api2-16-h2.musical.ly/aweme/v1/play/?video_id={}&vr_type=0&is_play_url=1&source=PackSourceEnum_PUBLISH&media_type=4".format(watermarkless_video_id) watermarkless_url = watermarked_url # Get extra metadata video_info = try_get(video_data, lambda x: x['videoData']['itemInfos'], dict) author_info = try_get(video_data, lambda x: x['videoData']['authorInfos'], dict) share_info = try_get(video_data, lambda x: x['shareMeta'], dict) unique_id = str_or_none(author_info.get('uniqueId')) timestamp = try_get(video_info, lambda x: int(x['createTime']), int) height = try_get(video_info, lambda x: x['video']['videoMeta']['height'], int) width = try_get(video_info, lambda x: x['video']['videoMeta']['width'], int) thumbnails = [] thumbnails.append({ 'url': video_info.get('thumbnail') or self._og_search_thumbnail(webpage), 'width': width, 'height': height }) formats = [] formats.append({ 'url': watermarkless_url, 'ext': 'mp4', 'height': height, 'width': width }) if video_data.get('statusCode') != 0: raise ExtractorError('Video not available', video_id=video_id) return { 'id': video_id, 'title': self._og_search_title(webpage), 'description': str_or_none(video_info.get('text')) or str_or_none(share_info.get('desc')), 'comment_count': int_or_none(video_info.get('commentCount')), 'duration': try_get(video_info, lambda x: x['video']['videoMeta']['duration'], int), 'height': height, 'like_count': int_or_none(video_info.get('diggCount')), 'repost_count': int_or_none(video_info.get('shareCount')), 'thumbnail': try_get(video_info, lambda x: x['covers'][0], str), 'timestamp': timestamp, 'width': width, 'creator': str_or_none(author_info.get('nickName')), 'uploader': unique_id, 'uploader_id': str_or_none(author_info.get('userId')), 'uploader_url': 'https://www.tiktok.com/@' + unique_id, 'thumbnails': thumbnails, 'webpage_url': self._og_search_url(webpage), 'ext': 'mp4', 'formats': formats, 'http_headers': { 'User-Agent': 'okhttp', } }
results = set() minimum_version = (88, 0, 0, 0) # mark 88.0.0.0 as minimum version # https://stackoverflow.com/questions/10649814/get-last-git-tag-from-a-remote-repo-without-cloning with subprocess.Popen( [ 'git', '-c', 'versionsort.suffix=-', 'ls-remote', '--tags', '--sort=v:refname', 'https://chromium.googlesource.com/chromium/src' ], stdout=subprocess.PIPE, ) as proc: for line in proc.stdout: commit_hash, tag_ref = line.strip().decode().split('\t') tag_name = tag_ref[10:] # trim first "refs/tags/" version_tuple = tuple( int_or_none(x) for x in tag_name.split('.') if x.isdigit()) if len(version_tuple) < 4: continue if version_tuple < minimum_version: continue results.add(tag_name) pycode = '''# coding: utf-8 # AUTOMATICALLY GENERATED FILE. DO NOT EDIT. # Generated by ./devscripts/make_chrome_version_list.py # This list is created from git tags in https://chromium.googlesource.com/chromium/src from __future__ import unicode_literals versions = [ "%s" ]
def _parse_mediapackage(self, video): tracks = video.get('media', {}).get('track', []) video_id = video.get('id') formats = [] for track in tracks: href = track['url'] ext = determine_ext(href, None) track_obj = {'url': href} transport = track.get('transport') if transport == 'DASH' or ext == 'mpd': formats.extend(self._extract_mpd_formats(href, video_id, mpd_id='dash', fatal=False)) elif transport == 'HLS' or ext == 'm3u8': formats.extend( self._extract_m3u8_formats(href, video_id, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False) ) elif transport == 'HDS' or ext == 'f4m': formats.extend(self._extract_f4m_formats(href, video_id, f4m_id='hds', fatal=False)) elif transport == 'SMOOTH': formats.extend(self._extract_ism_formats(href, video_id, ism_id='smooth', fatal=False)) elif ext == 'smil': formats.extend(self._extract_smil_formats(href, video_id, fatal=False)) else: if transport is not None: track_obj.update({'format_note': track.get('transport')}) if transport == 'RTMP': m_obj = re.search(r'^(?:rtmp://[^/]+/(?P<app>[^/]+))/(?P<playpath>.+)$', href) if not m_obj: continue track_obj.update( { 'app': m_obj.group('app'), 'play_path': m_obj.group('playpath'), 'rtmp_live': True, 'preference': -2, } ) extention = m_obj.group('playpath').split(':') if len(extention) > 1: track_obj.update({'ext': extention[0]}) audio_info = track.get('audio') if audio_info is not None: if 'bitrate' in audio_info: track_obj.update({'abr': int_or_none(audio_info.get('bitrate'), 1000)}) if 'samplingrate' in audio_info: track_obj.update({'asr': int_or_none(audio_info.get('samplingrate'))}) audio_encoder = audio_info.get('encoder', {}) if 'type' in audio_encoder: track_obj.update({'acodec': audio_encoder.get('type')}) video_info = track.get('video') if video_info is not None: if 'resolution' in video_info: track_obj.update({'resolution': video_info.get('resolution')}) resolution = parse_resolution(video_info.get('resolution')) track_obj.update(resolution) if 'framerate' in video_info: track_obj.update({'fps': int_or_none(video_info.get('framerate'))}) if 'bitrate' in video_info: track_obj.update({'vbr': int_or_none(video_info.get('bitrate'), 1000)}) video_encoder = video_info.get('encoder', {}) if 'type' in video_encoder: track_obj.update({'vcodec': video_encoder.get('type')}) formats.append(track_obj) self._sort_formats(formats) result_obj = {'formats': formats} if video_id is not None: result_obj.update({'id': video_id}) title = video.get('title') if title is not None: result_obj.update({'title': title}) series = video.get('seriestitle') if series is not None: result_obj.update({'series': series}) season_id = video.get('series') if season_id is not None: result_obj.update({'season_id': season_id}) creator = video.get('creators', {}).get('creator') if creator is not None: result_obj.update({'creator': creator}) timestamp = parse_iso8601(video.get('start')) if timestamp is not None: result_obj.update({'timestamp': timestamp}) attachments = video.get('attachments', {}).get('attachment', []) if len(attachments) > 0: thumbnail = attachments[0].get('url') result_obj.update({'thumbnail': thumbnail}) return result_obj