def getVideosApiPlayList(self, url, category, page, cItem): printDBG('YouTubeParser.getVideosApiPlayList url[%s]' % url) playlistID = self.cm.ph.getSearchGroups(url + '&', 'list=([^&]+?)&')[0] baseUrl = 'https://www.youtube.com/list_ajax?style=json&action_get_list=1&list=%s' % playlistID currList = [] if baseUrl != '': sts, data = self.cm.getPage(baseUrl, {'host': self.HOST}) try: data = json_loads(data)['video'] for item in data: url = 'http://www.youtube.com/watch?v=' + item[ 'encrypted_id'] title = item['title'] img = item['thumbnail'] time = item['length_seconds'] if '' != time: time = str(timedelta(seconds=int(time))) if time.startswith("0:"): time = time[2:] desc = item['description'] params = { 'type': 'video', 'category': 'video', 'title': title, 'url': url, 'icon': img, 'time': time, 'desc': desc } currList.append(params) except Exception: printExc() return currList
def isDashAllowed(): value = config.plugins.iptvplayer.ytShowDash.value printDBG("ALLOW DASH: >> %s" % value) if value == "true" and IsExecutable('ffmpeg'): return True elif value == "auto" and IsExecutable('ffmpeg') and IsExecutable( config.plugins.iptvplayer.exteplayer3path.value): return True else: return False
def getSearchResult(self, pattern, searchType, page, nextPageCategory, sortBy=''): printDBG( 'YouTubeParser.getSearchResult pattern[%s], searchType[%s], page[%s]' % (pattern, searchType, page)) currList = [] try: url = 'http://www.youtube.com/results?search_query=%s&filters=%s&search_sort=%s&page=%s' % ( pattern, searchType, sortBy, page) printD(url) sts, data = self.cm.getPage(url, {'host': self.HOST}) if sts: nextPage = self.cm.ph.getDataBeetwenMarkers( data, 'page-box', '</div>', False)[1] if nextPage.find('>%d<' % (int(page) + 1)) > -1: nextPage = True else: nextPage = False sp = '<li><div class="yt-lockup' if searchType == 'playlist': m2 = '<div class="branded-page-box' else: m2 = '</ol>' data = CParsingHelper.getDataBeetwenMarkers( data, sp, m2, False)[1] data = data.split(sp) currList = self.parseListBase(data, searchType) print "nextPage", nextPage if len(currList) and nextPage: item = { 'name': 'history', 'type': 'category', 'category': nextPageCategory, 'pattern': pattern, 'search_type': searchType, 'title': _("Next page"), 'page': str(int(page) + 1) } currList.append(item) except Exception as error: printE() printD(str(error)) return [] return currList
def getVideosFromTraylist(self, url, category, page, cItem): printDBG('YouTubeParser.getVideosFromTraylist') return self.getVideosApiPlayList(url, category, page, cItem) currList = [] try: sts, data = self.cm.getPage(url, {'host': self.HOST}) if sts: sts, data = CParsingHelper.getDataBeetwenMarkers( data, 'class="playlist-videos-container', '<div class="watch-sidebar-body">', False) data = data.split('class="yt-uix-scroller-scroll-unit') del data[0] return self.parseListBase(data, 'tray') except Exception: printExc() return [] return currList
def getListPlaylistsItems(self, url, category, page, cItem): printDBG('YouTubeParser.getListPlaylistsItems page[%s]' % (page)) currList = [] try: sts, data = self.cm.getPage(url, {'host': self.HOST}) if sts: #self.cm.ph.writeToFile('/mnt/new2/yt.html', data) if '1' == page: sts, data = CParsingHelper.getDataBeetwenMarkers( data, '<div class="yt-lockup clearfix', 'footer-container') else: data = json_loads(data) data = data['load_more_widget_html'] + '\n' + data[ 'content_html'] # nextPage match = re.search('data-uix-load-more-href="([^"]+?)"', data) if not match: nextPage = "" else: nextPage = match.group(1).replace('&', '&') itemsTab = data.split('<div class="yt-lockup clearfix') printDBG(itemsTab[0]) currList = self.parseListBase(itemsTab, 'playlist') if '' != nextPage: item = dict(cItem) item.update({ 'title': 'Następna strona', 'page': str(int(page) + 1), 'url': 'http://www.youtube.com' + nextPage }) currList.append(item) except Exception: printExc() return currList
def getVideosFromChannelList(self, url, category, page, cItem): printDBG('YouTubeParser.getVideosFromChannelList page[%s]' % (page)) currList = [] try: sts, data = self.cm.getPage(url, {'host': self.HOST}) if sts: if '1' == str(page): sts, data = CParsingHelper.getDataBeetwenMarkers( data, 'feed-item-container', 'footer-container', False) else: print "data", data data = json.loads(data) data = data['load_more_widget_html'] + '\n' + data[ 'content_html'] # nextPage match = re.search('data-uix-load-more-href="([^"]+?)"', data) if not match: nextPage = "" else: nextPage = match.group(1).replace('&', '&') data = data.split('feed-item-container') currList = self.parseListBase(data) if '' != nextPage: item = dict(cItem) item.update({ 'title': _("Next page"), 'page': str(int(page) + 1), 'url': 'http://www.youtube.com' + nextPage }) currList.append(item) except Exception: printExc() return [] return currList
def parserGOOGLE(self, baseUrl): printDBG("parserGOOGLE baseUrl[%s]" % baseUrl) videoTab = [] _VALID_URL = r'https?://(?:(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/)|video\.google\.com/get_player\?.*?docid=)(?P<id>[a-zA-Z0-9_-]{28,})' mobj = re.match(_VALID_URL, baseUrl) try: video_id = mobj.group('id') linkUrl = 'http://docs.google.com/file/d/' + video_id except Exception: linkUrl = baseUrl _FORMATS_EXT = { '5': 'flv', '6': 'flv', '13': '3gp', '17': '3gp', '18': 'mp4', '22': 'mp4', '34': 'flv', '35': 'flv', '36': '3gp', '37': 'mp4', '38': 'mp4', '43': 'webm', '44': 'webm', '45': 'webm', '46': 'webm', '59': 'mp4', } HTTP_HEADER = self.cm.getDefaultHeader(browser='chrome') HTTP_HEADER['Referer'] = linkUrl COOKIE_FILE = GetCookieDir('google.cookie') print 'COOKIE_FILE', COOKIE_FILE defaultParams = { 'header': HTTP_HEADER, 'use_cookie': True, 'load_cookie': False, 'save_cookie': True, 'cookiefile': COOKIE_FILE } sts, data = self.cm.getPage(linkUrl, defaultParams) if not sts: return False cookieHeader = self.cm.getCookieHeader(COOKIE_FILE) fmtDict = {} fmtList = self.cm.ph.getSearchGroups(data, '"fmt_list"[:,]"([^"]+?)"')[0] fmtList = fmtList.split(',') for item in fmtList: item = self.cm.ph.getSearchGroups(item, '([0-9]+?)/([0-9]+?x[0-9]+?)/', 2) if item[0] != '' and item[1] != '': fmtDict[item[0]] = item[1] data = self.cm.ph.getSearchGroups(data, '"fmt_stream_map"[:,]"([^"]+?)"')[0] data = data.split(',') for item in data: item = item.split('|') printDBG(">> type[%s]" % item[0]) if 'mp4' in _FORMATS_EXT.get(item[0], ''): try: quality = int(fmtDict.get(item[0], '').split('x', 1)[-1]) except Exception: quality = 0 videoTab.append({ 'name': 'drive.google.com: %s' % fmtDict.get(item[0], '').split('x', 1)[-1] + 'p', 'quality': quality, 'url': strwithmeta( unicode_escape(item[1]), { 'Cookie': cookieHeader, 'Referer': 'https://youtube.googleapis.com/', 'User-Agent': HTTP_HEADER['User-Agent'] }) }) videoTab.sort(key=lambda item: item['quality'], reverse=True) print "cookieHeader", cookieHeader list1 = [] for item in videoTab: url = item['url'] + "#User-agent=%s&Cookie=%s&Referer=%s" % ( HTTP_HEADER['User-Agent'], cookieHeader, 'https://youtube.googleapis.com/') name = item['name'] list1.append((str(name), str(url))) return list1
def parseListBase(self, data, type='video'): printDBG("parseListBase----------------") urlPatterns = { 'video': ['video', 'href="[ ]*?(/watch\?v=[^"]+?)"', ''], 'channel': ['category', 'href="(/[^"]+?)"', ''], 'playlist': ['category', 'list=([^"]+?)"', '/playlist?list='], 'movie': ['video', 'data-context-item-id="([^"]+?)"', '/watch?v='], 'live': ['video', 'href="(/watch\?v=[^"]+?)"', ''], 'tray': ['video', 'data-video-id="([^"]+?)"', '/watch?v='], } currList = [] for i in range(len(data)): #printDBG("+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # get requaired params url = urlPatterns[type][2] + self.getAttributes( urlPatterns[type][1], data[i]) print "url1", url # get title title = '' #self.getAttributes('title="([^"]+?)"', data[i]) if '' == title: title = self.getAttributes( 'data-context-item-title="([^"]+?)"', data[i]) if '' == title: title = self.getAttributes('data-video-title="([^"]+?)"', data[i]) if '' == title: sts, title = CParsingHelper.getDataBeetwenMarkers( data[i], '<h3 class="yt-lockup-title">', '</h3>', False) if '' == title: sts, title = CParsingHelper.getDataBeetwenReMarkers( data[i], re.compile('<span [^>]*?class="title[^>]*?>'), re.compile('</span>'), False) if '' == title: sts, title = CParsingHelper.getDataBeetwenReMarkers( data[i], re.compile('class="pl-video-title-link[^>]*?>'), re.compile('<'), False) if '' == title: titleMarker = self.cm.ph.getSearchGroups( data[i], '(<[^">]+?"yt-lockup-title[^"]*?"[^>]*?>)')[0] if '' != titleMarker: tidx = titleMarker.find(' ') if tidx > 0: tmarker = titleMarker[1:tidx] title = self.cm.ph.getDataBeetwenMarkers( data[i], titleMarker, '</%s>' % tmarker)[1] if '' != title: #title=self.cleanhtml(title) title = clean_html(title) if i == 0: printDBG(data[i]) img = self.getAttributes('data-thumb="([^"]+?\.jpg[^"]*?)"', data[i]) if '' == img: img = self.getAttributes('src="([^"]+?\.jpg[^"]*?)"', data[i]) if '' == img: img = self.getAttributes('<img[^>]+?data\-thumb="([^"]+?)"', data[i]) if '' == img: img = self.getAttributes('<img[^>]+?src="([^"]+?)"', data[i]) if '.gif' in img: img = '' time = self.getAttributes('data-context-item-time="([^"]+?)"', data[i]) if '' == time: time = self.getAttributes('class="video-time">([^<]+?)</span>', data[i]) if '' == time: sts, time = CParsingHelper.getDataBeetwenReMarkers( data[i], re.compile('pl-video-time"[^>]*?>'), re.compile('<'), False) if '' == time: sts, time = CParsingHelper.getDataBeetwenReMarkers( data[i], re.compile('timestamp"[^>]*?>'), re.compile('<'), False) time = time.strip() print "img", img print "time", time # desc descTab = [] desc = self.cm.ph.getDataBeetwenMarkers( data[i], '<div class="yt-lockup-meta', '</div>')[1] if desc != '': descTab.append(desc) desc = self.cm.ph.getDataBeetwenMarkers( data[i], '<span class="formatted-video-count', '</span>')[1] if desc != '': descTab.append(desc) desc = self.cm.ph.getDataBeetwenReMarkers( data[i], re.compile('class="video-description[^>]+?>'), re.compile('</p>'), False)[1] if '' == desc: desc = self.cm.ph.getDataBeetwenReMarkers( data[i], re.compile('class="yt-lockup-description[^>]+?>'), re.compile('</div>'), False)[1] if desc != '': descTab.append(desc) newDescTab = [] for desc in descTab: desc = clean_html(desc) if desc != '': newDescTab.append(desc) print "newDescTab", newDescTab urlTmp = url.split(';') if len(urlTmp) > 0: url = urlTmp[0] if type == 'video': url = url.split('&')[0] #printDBG("#####################################") #printDBG('url [%s] ' % url) #printDBG('title [%s] ' % title) #printDBG('img [%s] ' % img) #printDBG('time [%s] ' % time) #printDBG('desc [%s] ' % desc) if title != '' and url != '' and img != '': correctUrlTab = [url, img] for i in range(len(correctUrlTab)): if not correctUrlTab[i].startswith( 'http:') and not correctUrlTab[i].startswith( 'https:'): if correctUrlTab[i].startswith("//"): correctUrlTab[i] = 'http:' + correctUrlTab[i] else: correctUrlTab[ i] = 'http://www.youtube.com' + correctUrlTab[i] #else: # if correctUrlTab[i].startswith('https:'): # correctUrlTab[i] = "http:" + correctUrlTab[i][6:] title = clean_html(title) params = { 'type': urlPatterns[type][0], 'category': type, 'title': title, 'url': correctUrlTab[0], 'icon': correctUrlTab[1].replace('&', '&'), 'time': time, 'desc': '[/br]'.join(newDescTab) } currList.append(params) print "currList", currList return currList
def getDirectLinks(self, url, formats='flv, mp4', dash=True, dashSepareteList=False, allowVP9=None): printDBG("YouTubeParser.getDirectLinks") list = [] try: if self.cm.isValidUrl(url) and '/channel/' in url and url.endswith( '/live'): sts, data = self.cm.getPage(url) if sts: videoId = self.cm.ph.getSearchGroups( data, '''<meta[^>]+?itemprop=['"]videoId['"][^>]+?content=['"]([^'^"]+?)['"]''' )[0] if videoId == '': videoId = self.cm.ph.getSearchGroups( data, '''['"]REDIRECT_TO_VIDEO['"]\s*\,\s*['"]([^'^"]+?)['"]''' )[0] if videoId != '': url = 'https://www.youtube.com/watch?v=' + videoId list = YoutubeIE()._real_extract(url, allowVP9=allowVP9) except Exception: printExc() if dashSepareteList: return [], [] else: return [] reNum = re.compile('([0-9]+)') retHLSList = [] retList = [] dashList = [] # filter dash dashAudioLists = [] dashVideoLists = [] if dash: # separete audio and video links for item in list: if 'mp4a' == item['ext']: dashAudioLists.append(item) elif item['ext'] in ('mp4v', 'webmv'): dashVideoLists.append(item) elif 'mpd' == item['ext']: tmpList = getMPDLinksWithMeta(item['url'], checkExt=False) printDBG(tmpList) for idx in range(len(tmpList)): tmpList[idx]['format'] = "%sx%s" % (tmpList[idx].get( 'height', 0), tmpList[idx].get('width', 0)) tmpList[idx]['ext'] = "mpd" tmpList[idx]['dash'] = True dashList.extend(tmpList) # sort by quality -> format def _key(x): if x['format'].startswith('>'): int(x['format'][1:-1]) else: int(ph.search(x['format'], reNum)[0]) dashAudioLists = sorted(dashAudioLists, key=_key, reverse=True) dashVideoLists = sorted(dashVideoLists, key=_key, reverse=True) for item in list: printDBG(">>>>>>>>>>>>>>>>>>>>>") printDBG(item) printDBG("<<<<<<<<<<<<<<<<<<<<<") if -1 < formats.find(item['ext']): if 'yes' == item['m3u8']: format = re.search('([0-9]+?)p$', item['format']) if format != None: item['format'] = format.group(1) + "x" item['ext'] = item['ext'] + "_M3U8" item['url'] = decorateUrl(item['url'], {"iptv_proto": "m3u8"}) retHLSList.append(item) else: format = re.search('([0-9]+?x[0-9]+?$)', item['format']) if format != None: item['format'] = format.group(1) item['url'] = decorateUrl(item['url']) retList.append(item) if len(dashAudioLists): # use best audio for item in dashVideoLists: item = dict(item) item["url"] = decorateUrl( "merge://audio_url|video_url", { 'audio_url': dashAudioLists[0]['url'], 'video_url': item['url'] }) dashList.append(item) # try to get hls format with alternative method if 0 == len(retList): try: video_id = YoutubeIE()._extract_id(url) url = 'http://www.youtube.com/watch?v=%s&gl=US&hl=en&has_verified=1' % video_id sts, data = self.cm.getPage( url, { 'header': { 'User-agent': 'Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10' } }) if sts: data = data.replace('\\"', '"').replace('\\\\\\/', '/') hlsUrl = self.cm.ph.getSearchGroups( data, '''"hlsvp"\s*:\s*"(https?://[^"]+?)"''')[0] hlsUrl = json_loads('"%s"' % hlsUrl) if self.cm.isValidUrl(hlsUrl): hlsList = getDirectM3U8Playlist(hlsUrl) if len(hlsList): dashList = [] for item in hlsList: item['format'] = "%sx%s" % (item.get( 'with', 0), item.get('heigth', 0)) item['ext'] = "m3u8" item['m3u8'] = True retList.append(item) except Exception: printExc() if 0 == len(retList): retList = retHLSList if dash: try: sts, data = self.cm.getPage( url, { 'header': { 'User-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36' } }) data = data.replace('\\"', '"').replace('\\\\\\/', '/').replace('\\/', '/') dashUrl = self.cm.ph.getSearchGroups( data, '''"dashmpd"\s*:\s*"(https?://[^"]+?)"''')[0] dashUrl = json_loads('"%s"' % dashUrl) if '?' not in dashUrl: dashUrl += '?mpd_version=5' else: dashUrl += '&mpd_version=5' printDBG("DASH URL >> [%s]" % dashUrl) if self.cm.isValidUrl(dashUrl): dashList = getMPDLinksWithMeta(dashUrl, checkExt=False) printDBG(dashList) for idx in range(len(dashList)): dashList[idx]['format'] = "%sx%s" % ( dashList[idx].get('height', 0), dashList[idx].get('width', 0)) dashList[idx]['ext'] = "mpd" dashList[idx]['dash'] = True except Exception: printExc() for idx in range(len(retList)): if retList[idx].get('m3u8', False): retList[idx]['url'] = strwithmeta( retList[idx]['url'], {'iptv_m3u8_live_start_index': -30}) if dashSepareteList: return retList, dashList else: retList.extend(dashList) return retList
def isVP9Allowed(): value = config.plugins.iptvplayer.ytVP9.value printDBG("1. ALLOW VP9: >> %s" % value) value = YouTubeParser.isDashAllowed() and value printDBG("2. ALLOW Dash: >> %s" % value) return value