def getTeamCastList(self, cItem): printDBG('getTeamCastList start') #http://team-cast.pl.cp-21.webhostbox.net/kanalyFlash/ #http://team-cast.pl.cp-21.webhostbox.net/ #src="http://team-cast.pl.cp-21.webhostbox.net/kanalyFlash/film/hbo.html" url = cItem['url'] # list categories if '' == url : self.teamCastTab = {} url = 'http://team-cast.pl.cp-21.webhostbox.net/' sts, data = self.cm.getPage(url) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, '<div id="stream-frame">', '<div id="now-watching">', False)[1] # remove commented channels data = re.sub('<!--[^!]+?-->', '', data) data = data.split('<li class="menu_right">') del data[0] for cat in data: catName = CParsingHelper.getDataBeetwenMarkers(cat, '<a href="#" class="drop">', '</a>', False)[1].strip() channels = re.findall('<a href="([^"]+?)">([^<]+?)<img src="http://wrzucaj.net/images/2014/09/12/flash-player-icon.png"', cat) if len(channels): self.teamCastTab[catName] = channels newItem = dict(cItem) newItem.update({'url':catName, 'title':catName + ' (%d)' % len(channels)}) self.addDir(newItem) elif url in self.teamCastTab: # List channels for item in self.teamCastTab[url]: newItem = dict(cItem) newItem.update({'url':item[0], 'title':item[1]}) self.playVideo(newItem) else: printExc()
def getWebCamera(self, cItem): printDBG("getWebCamera start") sts, data = self.cm.getPage(cItem['url']) if sts: if cItem['title'] == 'WebCamera PL': params = dict(cItem) params.update({'title':'Polecane kamery'}) self.addDir(params) data = CParsingHelper.getDataBeetwenMarkers(data, '<h4>Kamery wg kategorii</h4>', '</div>', False)[1] data = data.split('</a>') del data[-1] for item in data: url = self.cm.ph.getSearchGroups(item, """href=['"](http[^'^"]+?)['"]""")[0] if '' != url: params = dict(cItem) params.update({'title':self._cleanHtmlStr(item), 'url':url}) self.addDir(params) else: data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="inlinecam', '<div id="footerbar">', False)[1] data = data.split('<div class="inlinecam') for item in data: item = CParsingHelper.getDataBeetwenMarkers(item, '<a', '</div>', True)[1] url = self.cm.ph.getSearchGroups(item, """href=['"](http[^'^"]+?)['"]""")[0] if '' != url: title = self._cleanHtmlStr(CParsingHelper.getDataBeetwenMarkers(item, '<div class="bar">', '</div>', False)[1]) icon = self.cm.ph.getSearchGroups(item, """data-src=['"](http[^'^"]+?)['"]""")[0] params = dict(cItem) params.update({'title':title, 'url':url, 'icon':icon}) self.playVideo(params)
def getLinksForVideo(self, cItem): printDBG("MoviesHDCO.getLinksForVideo [%s]" % cItem) urlTab = [] sts, data = self.getPage(cItem['url']) if not sts: return urlTab #printDBG(data) data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="video-embed">', '</div>', False)[1] oneLink = CParsingHelper.getDataBeetwenMarkers(data, 'data-rocketsrc="', '"', False)[1] if oneLink == '': oneLink = self.cm.ph.getSearchGroups(data, '<iframe[^>]+?src="([^"]+?)"')[0] if oneLink == '': oneLink = self.cm.ph.getSearchGroups(data, '<script[^>]+?src="([^"]+?)"')[0] if oneLink.startswith('//'): oneLink = 'http:' + oneLink oneLink = self._getFullUrl(oneLink) if 'videomega.tv/validatehash.php?' in oneLink: sts, data = self.cm.getPage(oneLink, {'header':{'Referer':cItem['url'], 'User-Agent':'Mozilla/5.0'}}) if not sts: return urlTab data = self.cm.ph.getSearchGroups(data, 'ref="([^"]+?)"')[0] if '' == data: return urlTab oneLink = 'http://videomega.tv/view.php?ref={0}&width=700&height=460&val=1'.format(data) if '' == oneLink: return urlTab name = self.up.getHostName(oneLink) urlTab.append({'name':name, 'url':oneLink, 'need_resolve':1}) return urlTab
def fillFilters(self, refresh=False): printDBG('getFilters') def SetFilters(raw, tab): printDBG("---------------------") for it in raw: tab.append({'tab': it[1], 'val': it[0]}) printDBG("filter: %r" % tab[-1]) if self.filtersFilled and not refresh: return False sts, data = self.cm.getPage(self.MAINURL) if False == sts: return rawSortFilters = CParsingHelper.getDataBeetwenMarkers(data, 'Sortuj:', '</div>', False)[1] rawVerFilters = CParsingHelper.getDataBeetwenMarkers(data, 'Wyświetl:', '</div>', False)[1] rawCatFilters = CParsingHelper.getDataBeetwenMarkers(data, 'Kategorie Filmowe', '<script>', False)[1] data = '' # free data ;) rawSortFilters = re.compile('href="[^,]+?\,([^,]+?)\,wszystkie,0\.html">([^<]+?)<').findall(rawSortFilters) rawVerFilters = re.compile('href="[^,]+?\,[^,]+?\,([^,]+?),0\.html">([^<]+?)<').findall(rawVerFilters) rawCatFilters = re.compile('href="([^,]+?\,[^.]+?)\.html">([^<]+?)<').findall(rawCatFilters) if 0 < len(rawSortFilters) and 0 < len(rawVerFilters) and 0 < len(rawCatFilters): self.filters['sort'] = [] self.filters['ver'] = [] self.filters['cat'] = [{'tab': 'Wszystkie', 'val': 'glowna'}] SetFilters(rawSortFilters, self.filters['sort']) SetFilters(rawVerFilters, self.filters['ver']) SetFilters(rawCatFilters, self.filters['cat']) self.filtersFilled = True
def getVideosList(self, url): printDBG("getVideosList url[%s]" % url) sts, data = self.cm.getPage(url) if not sts: printDBG("getVideosList except") return # get pagination HTML part nextPageData = CParsingHelper.getDataBeetwenMarkers(data, 'class="pager"', '</div>', False)[1] # get Video HTML part data = CParsingHelper.getDataBeetwenMarkers(data, '<!-- ************ end user menu ************ -->', '</ul>', False)[1].split('<li>') del data[0] for videoItemData in data: printDBG(' videoItemData') icon = '' duration = '' gatunek = '' plot = '' title = '' url = '' if 'class="playIcon"' in videoItemData: # get icon src match = re.search('src="(http://[^"]+?)"', videoItemData) if match: icon = match.group(1).replace('&', '&') # get duration match = re.search('class="duration"[^>]*?>([^<]+?)<', videoItemData) if match: duration = match.group(1).replace(''', "'") # get gatunek match = re.search('"gatunek"[^>]*?>([^<]+?)<', videoItemData) if match: gatunek = match.group(1) # get plot match = re.search('class="text"[^>]*?>([^<]+?)<', videoItemData) if match: plot = match.group(1) # get title and url match = re.search('<a href="([^"]+?)" class="title"[^>]*?>([^<]+?)</a>', videoItemData) if match: url = self.MAIN_URL + match.group(1) title = match.group(2) params = {'type': 'video', 'page': url, 'title': title, 'icon': icon, 'duration': duration, 'gatunek': gatunek, 'plot': plot} self.currList.append( params ) # check next page nextPageUrl = '' match = re.search('href="([^"]+?)" class="nextPage"', nextPageData) if match: nextPageUrl = match.group(1) else: match = re.search('href="([^"]+?)" class="lastPage"', nextPageData) if match: nextPageUrl = match.group(1) if '' != nextPageUrl: params = {'type': 'category', 'name': 'sub-category', 'page': self.MAIN_URL + nextPageUrl.replace('&', '&'), 'title': 'Następna strona'} self.currList.append( params )
def listSerialSeasons(self, category, url, icon): printDBG("listSerialSeasons") sts, data = self.cm.getPage( url ) if False == sts: return plot = CParsingHelper.getDataBeetwenMarkers(data, '<p class="serialDescription">', '</p>', False)[1] data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="seasonExpand">', '<script>', False)[1] data = re.compile('<a href="[/]?(serial,[^,]+?,sezon,[1-9][0-9]*?.html)">([^<]+?)</a>').findall(data) for item in data: if not item[0].startswith('http'): url = self.MAINURL + item[0] params = {'name': 'category', 'title': item[1], 'category': category, 'url': url, 'icon':icon, 'plot':plot} self.addDir(params)
def listSearchResults(self, pattern, searchType): printDBG("listFilms pattern[%s], searchType[%s]" % (pattern, searchType)) url = self.MAINURL + 'szukaj.html?query=%s&mID=' % pattern sts, data = self.cm.getPage( url ) if False == sts: return if 'filmy' == searchType: sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<h2 id="movies-res">Filmy:', '<a href="#top"', False) category = 'video' else: sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<h2 id="serials-res">Seriale:', '<a href="#top"', False) category = 'Serial_seasons_list' data = data.split('<li data-url=') self.listItems(data, category)
def getArticleContent(self, cItem): printDBG("Filmy3dEU.getArticleContent [%s]" % cItem) retTab = [] if 'url' not in cItem: return retTab sts, data = self.cm.getPage(cItem['url']) if not sts: return retTab sts, data = CParsingHelper.getDataBeetwenMarkers(data, "<div id='dle-content'>", '<div class="gf-right">', False) title = CParsingHelper.getDataBeetwenMarkers(data, '<h1 class="title">', '</h1>', False)[1] icon = self.cm.ph.getSearchGroups(data, 'srct="([^"]+?)"')[0] desc = self.cleanHtmlStr( self.cm.ph.getDataBeetwenMarkers(data, '<div class="comment-box-block" id="comment1">', '</div>', False)[1] ) return [{'title':title, 'text':desc, 'images':[]}]
def getF4MLinksWithMeta(manifestUrl, checkExt=True): if checkExt and not manifestUrl.split('?')[0].endswith('.f4m'): return [] cm = common() headerParams, postData = cm.getParamsFromUrlWithMeta(manifestUrl) retPlaylists = [] sts, data = cm.getPage(manifestUrl, headerParams, postData) if sts: liveStreamDetected = False if 'live' == CParsingHelper.getDataBeetwenMarkers('<streamType>', '</streamType>', False): liveStreamDetected = True bitrates = re.compile('bitrate="([0-9]+?)"').findall(data) for item in bitrates: link = strwithmeta(manifestUrl, {'iptv_proto':'f4m', 'iptv_bitrate':item}) if liveStreamDetected: link.meta['iptv_livestream'] = True retPlaylists.append({'name':'[f4m/hds] bitrate[%s]' % item, 'url':link}) if 0 == len(retPlaylists): link = strwithmeta(manifestUrl, {'iptv_proto':'f4m'}) if liveStreamDetected: link.meta['iptv_livestream'] = True retPlaylists.append({'name':'[f4m/hds]', 'url':link}) return retPlaylists
def getF4MLinksWithMeta(manifestUrl, checkExt=True): if checkExt and not manifestUrl.split("?")[0].endswith(".f4m"): return [] cm = common() headerParams, postData = cm.getParamsFromUrlWithMeta(manifestUrl) retPlaylists = [] sts, data = cm.getPage(manifestUrl, headerParams, postData) if sts: liveStreamDetected = False if "live" == CParsingHelper.getDataBeetwenMarkers("<streamType>", "</streamType>", False): liveStreamDetected = True bitrates = re.compile('bitrate="([0-9]+?)"').findall(data) for item in bitrates: link = strwithmeta(manifestUrl, {"iptv_proto": "f4m", "iptv_bitrate": item}) if liveStreamDetected: link.meta["iptv_livestream"] = True retPlaylists.append({"name": "[f4m/hds] bitrate[%s]" % item, "url": link}) if 0 == len(retPlaylists): link = strwithmeta(manifestUrl, {"iptv_proto": "f4m"}) if liveStreamDetected: link.meta["iptv_livestream"] = True retPlaylists.append({"name": "[f4m/hds]", "url": link}) return retPlaylists
def getHostingTable(self, urlItem): printDBG("getHostingTable url[%s]" % urlItem['url']) # use cache if possible if 0 < len( self.linksCacheCache.get('tab', []) ) and (urlItem['url'] + urlItem.get('ver', '')) == self.linksCacheCache.get('marker', None): return self.linksCacheCache['tab'] hostingTab = [] # get lang tab langTab = [] sts, data = self.cm.getPage( urlItem['url'] ) if False == sts: return hostingTab data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="langs">', '</div>', False)[1] data = re.compile('data-id="([^"]+?)"[^>]*?>(.+?)</a>', re.DOTALL).findall(data) for item in data: tmp = {'val': item[0], 'title': self.cleanHtmlStr(item[1])} if tmp['val'] == urlItem.get('ver', ''): langTab = [tmp] break else: langTab.append( tmp ) for lang in langTab: tmpTab = [] if self.loggedIn: tmpTab = self.getLinks(urlItem['url'], lang, {'val': 'premium', 'title':'Premium'}) if 0 == len(tmpTab): tmpTab = self.getLinks(urlItem['url'], lang, {'val': 'free', 'title':'Free'}) hostingTab.extend(tmpTab) self.linksCacheCache = {'marker': urlItem['url'] + urlItem.get('ver', ''), 'tab': hostingTab} return hostingTab
def getArticleContent(self, cItem): printDBG("MoviesHDCO.getArticleContent [%s]" % cItem) retTab = [] if 'url' not in cItem: return [] sts, data = self.getPage(cItem['url']) if not sts: return retTab sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<table id="imdbinfo">', '</table>', False) if not sts: return retTab tmp = data.split('</tr>') if len(tmp) < 2: return retTab title = self.cleanHtmlStr(tmp[0]) if '' == title: icon = self.cm.ph.getSearchGroups(tmp[1], 'alt="([^"]+?)"')[0] icon = self.cm.ph.getSearchGroups(tmp[1], 'src="([^"]+?)"')[0] desc = self.cm.ph.getDataBeetwenMarkers(tmp[1], '<b>Plot:</b>', '</td>', False)[1] otherInfo = {} tmpTab = [{'mark':'<b>Rating:</b>', 'key':'rating'}, {'mark':'<b>Director:</b>', 'key':'director'}, {'mark':'<b>Writer:</b>', 'key':'writer'}, {'mark':'<b>Stars:</b>', 'key':'stars'}, {'mark':'<b>Runtime:</b>', 'key':'duration'}, {'mark':'<b>Rated:</b>', 'key':'rated'}, {'mark':'<b>Genre:</b>', 'key':'genre'}, {'mark':'<b>Released:</b>', 'key':'released'}, ] for item in tmpTab: val = self.cm.ph.getDataBeetwenMarkers(tmp[1], item['mark'], '</td>', False)[1] if '' != val: otherInfo[item['key']] = self.cleanHtmlStr(val) return [{'title':self.cleanHtmlStr( title ), 'text': self.cleanHtmlStr( desc ), 'images':[{'title':'', 'url':self._getFullUrl(icon)}], 'other_info':otherInfo}]
def getLink(self, url): printDBG("getLink url[%s]" % url) urlItem = url.split('|') if 3 == len(urlItem): url = urlItem[0] post_data = { 'action': 'getPlayer', 'id': urlItem[1], 'playerType': urlItem[2] } HEADER = dict(self.AJAX_HEADER) HEADER['Referer'] = url if 'free' == urlItem[2]: http_params = {'header': HEADER} else: http_params = {'header': HEADER, 'use_cookie': True, 'save_cookie': False, 'load_cookie': True, 'cookiefile': self.COOKIE_FILE} sts, data = self.cm.getPage( url, http_params, post_data) if not sts: return '' data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="player">', '<div class="playerTypes">', False)[1] if 'free' == urlItem[2]: data = CParsingHelper.getSearchGroups(data, '<iframe [^>]*?src="([^"]+?)"')[0] sts, data = self.cm.getPage( data ) if not sts: return '' data = CParsingHelper.getSearchGroups(data, '<iframe [^>]*?src="([^"]+?)"')[0] return self.up.getVideoLink( data ) else: return CParsingHelper.getSearchGroups(data, 'url: [\'"](http[^\'"]+?)[\'"]')[0] return '' else: return url
def getTop100(self, baseUrl, mode): printDBG("getTop100 for url[%s]" % baseUrl) post_data = { 'kategoria' : mode } sts, data = self.cm.getPage( baseUrl, {}, post_data) if False == sts: printDBG("getTop100 problem") return sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="ew-top-100"', '</section></div>', True) if False == sts: printDBG("getTop100 problem no data beetween markers") return data = data.split('<div class="ew-top-100"') if len(data) > 1: del data[0] place = 1 for item in data: # url & title match = re.search('<a href="([^"]+?)" class="en">([^<]+?)</a>[^<]*?<a href="[^"]+?" class="pl">([^<]*?)</a>', item) if match: url = self.MAINURL + match.group(1) title = str(place) + '. ' + match.group(2) + ' / ' + match.group(3) place = place + 1 else: continue # img match = re.search('<img src="([^"]+?)"', item) if match: img = match.group(1) else: img = '' # plot match = re.search('<p[^>]*?>([^<]+?)</p>', item) if match: plot = match.group(1).strip() else: plot = '' params = { 'title': title, 'url': url, 'icon': img, 'plot': plot} self.addVideo(params)
def listItems(self, data, itemType, nextPageItem=None, getPlot=None, setRating=True, ver=''): for item in data: icon = CParsingHelper.getSearchGroups(item, 'src="([^"]+?jpg)"')[0] url = CParsingHelper.getSearchGroups(item, 'href="([^"]+?)"')[0] title = CParsingHelper.getSearchGroups(item, 'title="([^"]+?)"')[0] if '' == title: title = CParsingHelper.getSearchGroups(item, '<div class="title">([^<]+?)</div>')[0] strRating = '' if setRating: strRating = CParsingHelper.getSearchGroups(item, '<div class="rating" style="width:([0-9]+?)\%">')[0] if '' == strRating: strRating = '0' strRating = 'Ocena: %s | ' % (str(int(strRating)/10) + '/10') if None == getPlot: plot = CParsingHelper.getDataBeetwenMarkers(item, '<div class="description">', '<a', False)[1] else: plot = getPlot(item) plot = self.cleanHtmlStr(plot) # validate data if '' == url or '' == title: continue if not url.startswith('http'): url = self.MAINURL + url if len(icon) and not icon.startswith('http'): icon = self.MAINURL + icon if 'video' == itemType: params = {'title':title, 'url':url, 'icon':icon, 'plot': strRating + plot, 'ver': ver} self.addVideo(params) else: params = {'name': 'category', 'title':title, 'category': itemType, 'url':url, 'icon':icon, 'plot': strRating + plot} self.addDir(params) if None != nextPageItem: self.addDir(nextPageItem)
def getVideosFromChannelList(self, url, category, page, cItem): printDBG('YouTubeParser.getVideosFromChannelList page[%s]' % (page) ) currList = [] try: sts,data = self.cm.getPage(url, {'host': self.HOST}) if sts: if '1' == page: sts,data = CParsingHelper.getDataBeetwenMarkers(data, 'feed-item-container', 'footer-container', False) else: data = unescapeHTML(data.decode('unicode-escape')).encode('utf-8').replace('\/', '/') # nextPage match = re.search('data-uix-load-more-href="([^"]+?)"', data) if not match: nextPage = "" else: nextPage = match.group(1).replace('&', '&') data = data.split('feed-item-container') currList = self.parseListBase(data) if '' != nextPage: item = dict(cItem) item.update({'title': _("Next page"), 'page': str(int(page) + 1), 'url': 'http://www.youtube.com' + nextPage}) currList.append(item) except: printExc() return [] return currList
def getLastAdded(self, baseUrl, cat, sub_cat, mode, page): printDBG("getLastAdded for url[%s] page[%s]" % (baseUrl, page) ) HTTP_HEADER= { 'Host':'alekino.tv', 'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:21.0) Gecko/20100101 Firefox/21.0', 'Referer':self.MAINURL + '/', 'X-Requested-With':'XMLHttpRequest' } if page == '1': strPage = '' postPage = '0' else: strPage = '?page=' + page postPage = page post_data = { 'dostep' : 'true', 'mode':mode, 'days':'0', 'page':postPage } sts, data = self.cm.getPage( baseUrl + strPage, {'header':HTTP_HEADER}, post_data) if False == sts: printDBG("getLastAdded problem") return # next page? nextPage = False if -1 != data.find('rel="next"'): nextPage = True sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<div style="padding-bottom:0px;">', '<div class="pagination-recent">', True) if False == sts: printDBG("getLastAdded problem no data beetween markers") return data = data.split('<div style="padding-bottom:0px;">') if len(data) > 1: del data[0] for item in data: item = item.replace('<br/>', '') # url & title match = re.search('<a class="movie-title-hover" href="([^"]+?)"[^>]+?>([^<]+?)</a>', item) if match: url = self.MAINURL + match.group(1) title = match.group(2).replace('\n', '').replace('\r', '').strip() else: continue # img match = re.search('<img src="([^"]+?)"', item) if match: img = match.group(1) else: img = '' # plot match = re.search('<div class="clearfix"></div>([^<]+?)</div>', item) if match: plot = match.group(1) else: plot = '' params = { 'title': title, 'url': url, 'icon': img, 'plot': plot} if sub_cat == '': self.addVideo(params) else: params['name']='category' params['category']=sub_cat self.addDir(params) #pagination if nextPage: params = {'name': 'category', 'category': cat, 'sub_cat':sub_cat, 'title': 'Następna strona', 'url': baseUrl, 'mode':mode, 'page': str(int(page) + 1)} self.addDir(params)
def listSerialsByLetter(self, category, url): printDBG("listSerialsByLetter") sts, data = self.cm.getPage( url ) if False == sts: return sts, data = CParsingHelper.getDataBeetwenMarkers(data, 'Seriale na liter', '<div class="right">', False) data = data.split('</li>') self.listItems(data, category)
def getMovieTab(self, url): FightTube.printDBG('getMovieTab start') query_data = { 'url': url, 'return_data': True } try: data = self.cm.getURLRequestData(query_data) except: FightTube.printDBG('getMovieTab exception') return # get next page url nexPageUrl = '' sts, tmp = CParsingHelper.getDataBeetwenMarkers(data, "<nav class='pagination'>", "</nav>", False) if sts: match = re.search("<li><a href='([^']+?)'>></a></li>", tmp) if match: nexPageUrl = match.group(1) # separete vidTab sts, data = CParsingHelper.getDataBeetwenMarkers(data, "<ul class='videos-listing'>", "</ul>", False) if not sts: printDBG('getMovieTab: main markers cannot be found!') return # separate videos data data = data.split('</li>') for vidItem in data: url = '' title = '' icon = '' ocena = '' wyswietlen = '' match = re.search("<a href='([^']+?)'", vidItem) if match: url = match.group(1) match = re.search("<img src='([^']+?)' alt='([^']+?)'", vidItem) if match: icon = match.group(1) title = match.group(2) if '' != url and '' != title: params = {'type': 'video', 'title': title, 'page': url, 'icon': icon} self.currList.append(params) if nexPageUrl.startswith("http://"): params = {'type': 'category', 'name': 'nextpage', 'title': 'Następna strona', 'page': nexPageUrl, 'icon': ''} self.currList.append(params) return
def getMenuHTML(self): printDBG("getMenuHTML start") if True == self.refresh or '' == self.menuHTML: self.menuHTML = '' sts, data = self.cm.getPage( self.MAIN_URL ) if sts: self.menuHTML = CParsingHelper.getDataBeetwenMarkers(data, '<div class="nav-collapse collapse">', '<!--/.nav-collapse -->', False)[1] return self.menuHTML
def listSerialsLastUpdated(self, category): printDBG("listSerialsLastUpdated") sts, data = self.cm.getPage( self.SERIALS_URL ) if False == sts: return sts, data = CParsingHelper.getDataBeetwenMarkers(data, 'Ostatnio zaktualizowane seriale', '<div class="right">', False) data = data.split('</li>') def getPlot(item): return item self.listItems(data, category, None, getPlot, False)
def getVideoLinks(self, url): printDBG('getVideoLink url[%s]' % url) urlItems = [] sts, data = self.cm.getPage( url ) if not sts: return sts, data = CParsingHelper.getDataBeetwenMarkers(data, 'playlist: [', ']', True) data = re.compile("'http://tvproart.pl/tvonline/([^/]+?)/([^']+?\.mp4)'").findall(data) for item in data: urlItems.append({'name':item[0], 'url': self.MAI_URL + item[0] + '/' + item[1]}) return urlItems
def listSerialsBack(self, category, marker1, marker2): sts, data = self.cm.getPage( self.SERIALS_URL ) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, marker1, marker2, False)[1].replace("\\'", '"').replace('\\', '') marker = '<a onmouseover="toolTip(' data = data.split(marker) def getPlot(item): return CParsingHelper.getDataBeetwenMarkers(item, 'width="100"></td><td>', '<div', False)[1] self.listItems(data, category, None, getPlot)
def listsCategoriesMenu(self, url): sts, data = self.cm.getPage( url, {'header': self.HEADER } ) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, 'movie-kat-selection">', '</ul>', False)[1] data = data.split('</li>') for item in data: title = self.cleanHtmlStr(item) catID = CParsingHelper.getSearchGroups(item, 'data-value="([^"]+?)"', 1)[0] params = {'name': 'category', 'title': title, 'category': catID} self.addDir(params)
def listsMainMenu(self): printDBG("listsMainMenu") url = self.MAI_URL sts, data = self.cm.getPage( url ) if not sts: return sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="menulist"', '<a href="tvonline">', True) data = re.compile('<a class="menuitemtv" href="([^"]+?)">([^<]+?)</a></div>').findall(data) for item in data: params = {'name': 'category', 'title': item[1], 'url': self.MAI_URL + item[0]} self.addDir(params)
def listMovies(self, cItem): printDBG("Filmy3dEU.listMovies") page = cItem.get('page', 1) url = cItem['url'] if page > 1 and '?' not in url: if url.endswith('/'): url = url[:-1] url += '/page/%s/' % page post_data = cItem.get('post_data', {}) if {} == post_data: sts, data = self.cm.getPage(url, {}, self._addSortData(cItem.get('sort_type', ''))) else: sts, data = self.cm.getPage(cItem['url'], {'raw_post_data':True, 'use_cookie': True, 'save_cookie': False, 'load_cookie': True, 'cookiefile': self.COOKIE_FILE}, post_data) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="short-film">', '<div class="gf-right">', False)[1] data = data.split('<div class="short-film">') if len(data) > 0 and '<span class="pnext">Poprzednia</span></a>' in data[-1]: nextPage = True else: nextPage = False for item in data: tmp = item.split('<h5')[-1] url = self.cm.ph.getSearchGroups(tmp, 'href="([^"]+?)"')[0] icon = self.cm.ph.getSearchGroups(item, 'src="([^"]+?)"')[0] title = CParsingHelper.getDataBeetwenMarkers(tmp, '>', '</h5>', False)[1] if '' == title: self.cm.ph.getSearchGroups(item, 'alt="([^"]+?)"')[0] desc = _("Rating") + ': {0}/100, '.format(self.cm.ph.getSearchGroups(item, 'width\:([0-9]+?)\%')[0]) desc += CParsingHelper.getDataBeetwenMarkers(item, '<p class="text">', '</p>', False)[1] if '' != url and '' != title: params = dict(cItem) params.update( {'title':self.cleanHtmlStr( title ), 'url':self._getFullUrl(url), 'desc': self.cleanHtmlStr( desc ), 'icon':self._getFullUrl(icon)} ) self.addVideo(params) if nextPage: params = dict(cItem) params.update( {'title':_('Next page'), 'page':page+1} ) self.addDir(params)
def listSerialsAlphabeticallyMenu(self, category): printDBG("listSerialsAlphabeticallyMenu") sts, data = self.cm.getPage( self.SERIALS_URL ) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="content alphabet">', '</ul>', False)[1] data = re.compile('<a href="([^"]+?)">([^<]+?)</a>').findall(data) for item in data: if not item[0].startswith('http'): url = self.MAINURL + item[0] params = {'name': 'category', 'title': item[1], 'category': category, 'url': url} self.addDir(params)
def listSerialEpisodes(self, url, icon, plot): printDBG("listSerialEpisodes") sts, data = self.cm.getPage( url ) if False == sts: return sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="section serial episodes">', '<script>', False) data = re.compile('<li><a href="[/]?(serial,[^,]+?,sezon,[1-9][0-9]*?,epizod,[0-9]+?.html)">([^<]+?)</a></li>').findall(data) for item in data: if not item[0].startswith('http'): url = self.MAINURL + item[0] params = {'title':item[1], 'url':url, 'icon':icon, 'plot':plot} self.addVideo(params)
def _listItemsTab2(self, cItem, category='video'): printDBG("AlltubeTV._listItemsTab >>>>>>> cItem[%r]" % cItem) url = cItem['url'] if '' != cItem.get('cat', ''): url += 'kategoria[%s]+' % cItem['cat'] if '' != cItem.get('ver', ''): url += 'wersja[%s]+' % cItem['ver'] if '' != cItem.get('year', ''): url += 'rok[%s]+' % cItem['year'] page = cItem.get('page', 1) if page > 1: url += 'strona[%s]+' % page sts, data = self.cm.getPage(url) if not sts: return if ('strona[%s]+' % (page + 1)) in data: nextPage = True else: nextPage = False data = CParsingHelper.getDataBeetwenMarkers(data, 'name="filter-year">', '<div class="container">', False)[1] data = data.split('<div class="col-sm-6">') if len(data): del data[0] for item in data: url = self.cm.ph.getSearchGroups(item, 'href="([^"]+?)"')[0] icon = self.cm.ph.getSearchGroups(item, 'src="([^"]+?)"')[0] title = tmp[0] desc = tmp[1] params = dict(cItem) params.update({ 'title': self.cleanHtmlStr(title), 'url': self._getFullUrl(url), 'desc': self.cleanHtmlStr(desc), 'icon': self._getFullUrl(icon) }) if category != 'video': params['category'] = category self.addDir(params) else: self.addVideo(params) if nextPage: params = dict(cItem) params.update({'title': _('Next page'), 'page': page + 1}) self.addDir(params)
def listEpisodes(self, cItem): printDBG("SeansikTV.listEpisodes") url = self._getFullUrl(cItem['url']) sts, data = self.cm.getPage(url) if False == sts: return data = CParsingHelper.getDataBeetwenMarkers( data, 'id="%s"' % cItem['season'], '</table>', False)[1] marker = '<tr itemprop="episode"' data = data.split(marker) if len(data): del data[0] for item in data: if 'cross_add gray' in item: url = self._getFullUrl( CParsingHelper.getSearchGroups(item, 'href="([^"]+?)"')[0]) title = self.cleanHtmlStr( CParsingHelper.getDataBeetwenMarkers( item, '</td>', '</td>', False)[1]) desc = self.cleanHtmlStr(item.split('</b>')[1]) # validate data if '' == url or '' == title: continue params = dict(cItem) params.update({'title': title, 'url': url, 'desc': desc}) self.addVideo(params)
def listsCategoriesMenu(self, url): sts, data = self.cm.getPage(url, {'header': self.HEADER}) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, 'movie-kat-selection">', '</ul>', False)[1] data = data.split('</li>') for item in data: title = self.cleanHtmlStr(item) catID = CParsingHelper.getSearchGroups(item, 'data-value="([^"]+?)"', 1)[0] params = {'name': 'category', 'title': title, 'category': catID} self.addDir(params)
def listAllSeries(self, cItem, category): printDBG("KwejkTV.listAllSeries") sts, data = self.cm.getPage(cItem['url']) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, ' <ul class="filter">', '</ul>', False)[1] data = data.split('</li>') if len(data): del data[-1] for item in data: url = self.cm.ph.getSearchGroups(item, 'href="([^"]+?)"')[0] params = dict(cItem) params.update( {'title': self.cleanHtmlStr( item ), 'url':self._getFullUrl(url)} ) params['category'] = category self.addDir(params)
def getLinks(self, url, lang, playerType): printDBG("getLinks lang[%r], playerType[%r]" % (lang, playerType) ) hostingTab = [] HEADER = dict(self.AJAX_HEADER) HEADER['Referer'] = url if 'free' == playerType['val']: http_params = {'header': HEADER} else: http_params = {'header': HEADER, 'use_cookie': True, 'save_cookie': False, 'load_cookie': True, 'cookiefile': self.COOKIE_FILE} #post_data = { 'action': 'getPlayer', 'lang': lang['val'], 'playerType': playerType['val'] } post_data = { 'action': 'getPlayer', 'id': lang['val'], 'setHosting': '1' } sts, data = self.cm.getPage( url, http_params, post_data) if not sts or 'Player premium jest dostępny tylko dla' in data: return hostingTab # get players ID playersData = CParsingHelper.getDataBeetwenMarkers(data, '<div class="services">', '</div>', False)[1] playersData = re.compile('data-id="([0-9]+?)" data-playertype="([^"]+?)"[^>]*?>([^<]+?)</a>').findall(data) for item in playersData: tmp = {'need_resolve':1, 'name': '%s|%s|%s' % (lang['title'].ljust(16), playerType['title'].center(12), item[2].strip().rjust(14)), 'url': '%s|%s|%s' % (url, item[0], playerType['val']) } hostingTab.append(tmp) # new method to get premium links if 0 == len(hostingTab): sts, tmp = CParsingHelper.getDataBeetwenMarkers(data, 'newPlayer.init("', '")', False) try: tmp = CParsingHelper.getSearchGroups(data, 'id="%s" data-key="([^"]+?)"' % tmp)[0] tmp = base64.b64decode(tmp[2:]) tmp = byteify( json.loads(tmp)['url'] ) title = '%s | premium' % lang['title'].ljust(16) tmp = {'need_resolve':1, 'name': title, 'url': tmp} hostingTab.append(tmp) printDBG("||||||||||||||||||||||||||||||||||||%s|||||||||||||||||||||||||||||||" % tmp) except: printExc() return hostingTab
def listVideos(self, catUrl, page): printDBG("listVideos") sts, data = self.cm.getPage( catUrl + '/offset/%d' % page) if not sts: return nextPage = None if ('offset/%d' % (page + 1)) in data: nextPage = page + 1 sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<p class="material">', '</table>', True) data = re.compile('<a class="matlink" href="([^"]+?)">([^<]+?)</a').findall(data) for item in data: params = {'name': 'category', 'title': item[1], 'url': self.MAI_URL + item[0]} self.playVideo(params) if None != nextPage: params = {'name': 'category', 'page': nextPage, 'title': 'Następna strona', 'url': catUrl} self.addDir(params)
def listAllSeries(self, category): sts, data = self.getPage(cItem['url']) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, '<ul class="term-list">', '</ul>', False)[1] data = data.split('</li>') if len(data): del data[-1] for item in data: url = self.cm.ph.getSearchGroups(item, 'href="([^"]+?)"')[0] icon = '' params = dict(cItem) params.update( {'title': self.cleanHtmlStr( item ), 'url':self.getFullUrl(url), 'desc': '', 'icon':self.getFullUrl(icon)} ) params['category'] = category self.addDir(params)
def getArticleContent(self, cItem): printDBG("MoviesHDCO.getArticleContent [%s]" % cItem) retTab = [] sts, data = self.cm.getPage(cItem['url']) if not sts: return retTab sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="movie_big relative">', '<div class="report">', False) if not sts: return retTab icon = self.cm.ph.getSearchGroups(data, 'src="([^"]+?)"')[0] desc = self.cm.ph.getDataBeetwenMarkers(data, '<div class="movie_desc">', '<div class="clear">', False)[1] title = self.cleanHtmlStr( self.cm.ph.getDataBeetwenMarkers(data, '<h1>', '</h1>', False)[1] ) if '' == title: title = cItem['title'] otherInfo = {} return [{'title': title, 'text': self.cleanHtmlStr( desc ), 'images':[{'title':'', 'url':self._getFullUrl(icon)}], 'other_info':otherInfo}]
def listSerialEpisodes(self, url, icon, plot): printDBG("listSerialEpisodes") sts, data = self.cm.getPage(url) if False == sts: return sts, data = CParsingHelper.getDataBeetwenMarkers( data, '<div class="section serial episodes">', '<script>', False) data = re.compile( '<li><a href="[/]?(serial,[^,]+?,sezon,[1-9][0-9]*?,epizod,[0-9]+?.html)">([^<]+?)</a></li>' ).findall(data) for item in data: if not item[0].startswith('http'): url = self.MAINURL + item[0] params = {'title': item[1], 'url': url, 'icon': icon, 'plot': plot} self.addVideo(params)
def listSerialSeasons(self, category, url, icon): printDBG("listSerialSeasons") sts, data = self.cm.getPage(url) if False == sts: return plot = CParsingHelper.getDataBeetwenMarkers( data, '<p class="serialDescription">', '</p>', False)[1] data = CParsingHelper.getDataBeetwenMarkers( data, '<div class="seasonExpand">', '<script>', False)[1] data = re.compile( '<a href="[/]?(serial,[^,]+?,sezon,[1-9][0-9]*?.html)">([^<]+?)</a>' ).findall(data) for item in data: if not item[0].startswith('http'): url = self.MAINURL + item[0] params = { 'name': 'category', 'title': item[1], 'category': category, 'url': url, 'icon': icon, 'plot': plot } self.addDir(params)
def getMoviesList(self, baseUrl, cat, page): printDBG("getMoviesList for url[%s], page[%s]" % (baseUrl, page)) if page == '1': strPage = '' else: strPage = 'p=' + page sts, data = self.cm.getPage( baseUrl + strPage ) if False == sts: printDBG("getMoviesList problem") return sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<!-- Listing film', 'Regulamin', False) if False == sts: printDBG("getMoviesList problem no data beetween markers") return data = data.split('movie-item') if len(data) > 1: del data[0] # next page? nextPage = False if -1 != data[-1].find('rel="next"'): nextPage = True for item in data: # url & title match = re.search('<a class="title" href="([^"]+?)">(.+?)</a>', item) if match: url = self.MAINURL + match.group(1) title = match.group(2).replace('<small>', '').replace('</small>', '') else: continue # img match = re.search('style="background-image:url\(([^)]+?)\);"', item) if match: img = match.group(1) else: img = '' # plot match = re.search('<p class="desc">([^<]+?)</p>', item) if match: plot = match.group(1) else: plot = '' params = { 'title': title, 'url': url, 'icon': img, 'plot': plot} self.addVideo(params) #pagination if nextPage: params = {'name': 'category', 'category': cat, 'title': 'Następna strona', 'url': baseUrl, 'page': str(int(page) + 1)} self.addDir(params)
def getSearchResult(self, pattern, searchType, page, nextPageCategory, sortBy=''): printDBG( 'YouTubeParser.getSearchResult pattern[%s], searchType[%s], page[%s]' % (pattern, searchType, page)) currList = [] try: url = 'http://www.youtube.com/results?search_query=%s&filters=%s&search_sort=%s&page=%s' % ( pattern, searchType, sortBy, page) sts, data = self.cm.getPage(url, {'host': self.HOST}) if sts: nextPage = self.cm.ph.getDataBeetwenMarkers( data, 'page-box', '</div>', False)[1] if nextPage.find('>%d<' % (int(page) + 1)) > -1: nextPage = True else: nextPage = False sp = '<li><div class="yt-lockup' if searchType == 'playlist': m2 = '<div class="branded-page-box' else: m2 = '</ol>' data = CParsingHelper.getDataBeetwenMarkers( data, sp, m2, False)[1] data = data.split(sp) currList = self.parseListBase(data, searchType) if len(currList) and nextPage: item = { 'name': 'history', 'type': 'category', 'category': nextPageCategory, 'pattern': pattern, 'search_type': searchType, 'title': _("Next page"), 'page': str(int(page) + 1) } currList.append(item) except Exception: printExc() return [] return currList
def getSerialEpisodItems(self, url, episode, icon): printDBG("getSerialEpisodItems start url=[%s] episode[%s]" % (url, episode) ) sts, data = self.cm.getPage( url ) if False == sts: printDBG("getSerialEpisodItems problem") return sts, data = CParsingHelper.getDataBeetwenMarkers(data, 'id="sezon_%s"' % episode, '</div>', False) if False == sts: printDBG("getSerialEpisodItems problem no data beetween markers") return data = re.compile('<a class="o" href="([^"]+?)">([^<]+?)</a>').findall(data) if len(data) > 0: for i in range(len(data)): params = {'name':'category', 'title':data[i][1], 'url':self.MAINURL + data[i][0], 'icon': icon,} self.addVideo(params)
def listSerialsBack(self, category, marker1, marker2): sts, data = self.cm.getPage(self.SERIALS_URL) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, marker1, marker2, False)[1].replace( "\\'", '"').replace('\\', '') marker = '<a onmouseover="toolTip(' data = data.split(marker) def getPlot(item): return CParsingHelper.getDataBeetwenMarkers( item, 'width="100"></td><td>', '<div', False)[1] self.listItems(data, category, None, getPlot)
def getTop100Cat(self, baseUrl, cat): printDBG("getTop100Cat for url[%s]" % baseUrl) sts, data = self.cm.getPage( baseUrl ) if False == sts: printDBG("getTop100Cat problem") return sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<form method="POST" id="changecat">', '</select>', False) if False == sts: printDBG("getTop100Cat problem no data beetween markers") return data = re.compile('<option value="([^"]+?)" >([^<]+?)</option>').findall(data) if len(data) > 0: for i in range(len(data)): params = {'name':'category', 'category':cat, 'title':data[i][1], 'url':baseUrl, 'mode':data[i][0]} self.addDir(params)
def getVideosFromTraylist(self, url, category, page, cItem): printDBG('YouTubeParser.getVideosFromTraylist') return self.getVideosApiPlayList(url, category, page, cItem) currList = [] try: sts, data = self.cm.getPage(url, self.http_params) if sts: sts, data = CParsingHelper.getDataBeetwenMarkers(data, 'class="playlist-videos-container', '<div class="watch-sidebar-body">', False) data = data.split('class="yt-uix-scroller-scroll-unit') del data[0] return except Exception: printExc() return [] return currList
def prognozaPogodyList(self, url): printDBG("prognozaPogodyList start") if config.plugins.iptvplayer.weather_useproxy.value: params = {'http_proxy':config.plugins.iptvplayer.proxyurl.value} else: params = {} sts,data = self.cm.getPage(url, params) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, '<div id="items">', '</div>', False)[1] data = data.split('</a>') if len(data): del data[-1] for item in data: params = {'name':"prognoza.pogody.tv"} params['url'] = self.cm.ph.getSearchGroups(item, 'href="([^"]+?)"')[0] params['icon'] = self.cm.ph.getSearchGroups(item, 'src="([^"]+?)"')[0] params['title'] = self.cleanHtmlStr(item) if len(params['icon']) and not params['icon'].startswith('http'): params['icon'] = 'http://prognoza.pogody.tv/'+params['icon'] if len(params['url']) and not params['url'].startswith('http'): params['url'] = 'http://prognoza.pogody.tv/'+params['url'] self.addVideo(params)
def getArticleContent(self, cItem): printDBG("XrysoiSE.getArticleContent [%s]" % cItem) retTab = [] if 'movie' == cItem.get('mode') or 'explore_item' == cItem.get('category'): sts, data = self.cm.getPage(cItem['url']) if not sts: return retTab sts, data = CParsingHelper.getDataBeetwenMarkers(data, '<meta property', '<script') if not sts: return retTab icon = self.cm.ph.getSearchGroups(data, '<meta[^>]*?property="og:image"[^>]*?content="(http[^"]+?)"')[0] title = self.cm.ph.getSearchGroups(data, '<meta[^>]*?property="og:title"[^>]*?content="([^"]+?)"')[0] desc = self.cm.ph.getSearchGroups(data, '<meta[^>]*?property="og:description"[^>]*?content="([^"]+?)"')[0] return [{'title':self.cleanHtmlStr( title ), 'text': self.cleanHtmlStr( desc ), 'images':[{'title':'', 'url':self._getFullUrl(icon)}], 'other_info':{}}] else: return retTab
def getVideosFromPlaylist(self, url, category, page): printDBG('YouTubeParser.getVideosFromPlaylist') currList = [] #try: if 1: sts, data = self.cm.getPage(url, {'host': self.HOST}) if sts: sts, data = CParsingHelper.getDataBeetwenMarkers( data, 'gh-activityfeed', 'footer-container', False) itemsTab = data.split('playlist-video-item-base-content') return self.parseListBase(itemsTab) #except: else: printDBG('YouTubeParser.getVideosFromPlaylist Exception') return [] return currList
def getTop100(self, baseUrl, mode): printDBG("getTop100 for url[%s]" % baseUrl) post_data = {'kategoria': mode} sts, data = self.cm.getPage(baseUrl, {}, post_data) if False == sts: printDBG("getTop100 problem") return sts, data = CParsingHelper.getDataBeetwenMarkers( data, '<div class="ew-top-100"', '</section></div>', True) if False == sts: printDBG("getTop100 problem no data beetween markers") return data = data.split('<div class="ew-top-100"') if len(data) > 1: del data[0] place = 1 for item in data: # url & title match = re.search( '<a href="([^"]+?)" class="en">([^<]+?)</a>[^<]*?<a href="[^"]+?" class="pl">([^<]*?)</a>', item) if match: url = self.MAINURL + match.group(1) title = str(place) + '. ' + match.group( 2) + ' / ' + match.group(3) place = place + 1 else: continue # img match = re.search('<img src="([^"]+?)"', item) if match: img = match.group(1) else: img = '' # plot match = re.search('<p[^>]*?>([^<]+?)</p>', item) if match: plot = match.group(1).strip() else: plot = '' params = { 'title': title, 'url': url, 'icon': img, 'plot': plot } self.addVideo(params)
def getHostingTable(self, urlItem): printDBG("getHostingTable url[%s]" % urlItem['url']) # use cache if possible if 0 < len(self.linksCacheCache.get('tab', [])) and ( urlItem['url'] + urlItem.get('ver', '')) == self.linksCacheCache.get( 'marker', None): return self.linksCacheCache['tab'] hostingTab = [] # get lang tab langTab = [] sts, data = self.cm.getPage(urlItem['url']) if False == sts: return hostingTab data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="langs">', '</div>', False)[1] data = re.compile('data-id="([^"]+?)"[^>]*?>(.+?)</a>', re.DOTALL).findall(data) for item in data: tmp = {'val': item[0], 'title': self.cleanHtmlStr(item[1])} if tmp['val'] == urlItem.get('ver', ''): langTab = [tmp] break else: langTab.append(tmp) for lang in langTab: tmpTab = [] if self.loggedIn: tmpTab = self.getLinks(urlItem['url'], lang, { 'val': 'premium', 'title': 'Premium' }) if 0 == len(tmpTab): tmpTab = self.getLinks(urlItem['url'], lang, { 'val': 'free', 'title': 'Free' }) hostingTab.extend(tmpTab) self.linksCacheCache = { 'marker': urlItem['url'] + urlItem.get('ver', ''), 'tab': hostingTab } return hostingTab
def listGenres(self, cItem, category): printDBG("MoviesHDCO.listMoviesGenres") tmpList = [{'title': _("***Any***"), 'url':self.MAIN_URL+'/page/{page}?display=tube&filtre={sort_by}'}] if 1: sts, data = self.getPage(cItem['url']) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, '<ul class="listing-cat">', '</ul>', False)[1] data = data.split('</li>') if len(data): del data[-1] for item in data: url = self.cm.ph.getSearchGroups(item, 'href="([^"]+?)"')[0] icon = self.cm.ph.getSearchGroups(item, 'src="([^"]+?)"')[0] title = self.cleanHtmlStr(item) tmpList.append({'title': title, 'icon':self._getFullUrl(icon), 'url':self._getFullUrl(url)+'/page/{page}?display=tube&filtre={sort_by}'}) mainItem = dict(cItem) mainItem.update({'category':category}) self.listsTab(tmpList, mainItem)
def getVideosFromTraylist(self, url): printDBG('YouTubeParser.getVideosFromTraylist') currList = [] #try: if 1: sts, data = self.cm.getPage(url, {'host': self.HOST}) if sts: sts, data = CParsingHelper.getDataBeetwenMarkers( data, 'playlist-tray-container', 'playlist-tray-trim', False) data = data.split('video-list-item') return self.parseListBase(data) #except: else: printDBG('YouTubeParser.getVideosFromTraylist Exception') return [] return currList
def listSerialsAlphabeticallyMenu(self, category): printDBG("listSerialsAlphabeticallyMenu") sts, data = self.cm.getPage(self.SERIALS_URL) if not sts: return data = CParsingHelper.getDataBeetwenMarkers( data, '<div class="content alphabet">', '</ul>', False)[1] data = re.compile('<a href="([^"]+?)">([^<]+?)</a>').findall(data) for item in data: if not item[0].startswith('http'): url = self.MAINURL + item[0] params = { 'name': 'category', 'title': item[1], 'category': category, 'url': url } self.addDir(params)
def fillSeriesCache(self, url): printDBG("AlltubeTV.fillSeriesCache") self.seriesCache = {} self.seriesLetters = [] sts, data = self.getPage(url) if not sts: return data = CParsingHelper.getDataBeetwenMarkers(data, 'term-list clearfix">', '</ul>', False)[1] data = re.compile('<li[^>]*?data-letter="([^"]+)"[^>]*?>[^<]*?<a[^>]*?href="([^"]+?)"[^>]*?>([^<]+?)<').findall(data) for item in data: letter = item[0] url = item[1] title = item[2] if letter not in self.seriesCache: self.seriesCache[letter] = [] self.seriesLetters.append({'title':letter, 'letter':letter}) self.seriesCache[letter].append({'good_for_fav':True, 'title': self.cleanHtmlStr( title ), 'url':self.getFullUrl(url)}) for idx in range(len(self.seriesLetters)): letter = self.seriesLetters[idx]['letter'] self.seriesLetters[idx]['title'] = letter + ' [%d]' % len(self.seriesCache[letter])
def getEpisodesTab(self, url, serial, sezon, icon): printDBG("getSerialEpisodItems start url=[%s] episode[%s]" % (url, sezon) ) sts, data = self.cm.getPage( url ) if False == sts: printDBG("getSerialEpisodItems problem") return sts, data = CParsingHelper.getDataBeetwenMarkers(data, 'id="%s"' % sezon, '</div>', False) if False == sts: printDBG("getSerialEpisodItems problem no data beetween markers") return data = re.compile('<a class="o" href="([^"]+?)/([^"]+?)/([^"]+?)">([^<]+?)</a>').findall(data) if len(data) > 0: for i in range(len(data)): page = self.MAINURL + data[i][0]+'/'+data[i][1]+'/'+data[i][2] title = self.cm.html_entity_decode( data[i][3] ) plot = '%s - %s' % (serial, data[i][1]) params = {'season': sezon, 'tvshowtitle': serial, 'episode':data[i][1], 'title': title, 'page': page, 'icon': icon} self.addVideo(params)
def getLink(self, url): printDBG("getLink url[%s]" % url) urlItem = url.split('|') if 3 == len(urlItem): url = urlItem[0] post_data = { 'action': 'getPlayer', 'id': urlItem[1], 'playerType': urlItem[2] } HEADER = dict(self.AJAX_HEADER) HEADER['Referer'] = url if 'free' == urlItem[2]: http_params = {'header': HEADER} else: http_params = { 'header': HEADER, 'use_cookie': True, 'save_cookie': False, 'load_cookie': True, 'cookiefile': self.COOKIE_FILE } sts, data = self.cm.getPage(url, http_params, post_data) if not sts: return '' data = CParsingHelper.getDataBeetwenMarkers( data, '<div class="player">', '<div class="playerTypes">', False)[1] if 'free' == urlItem[2]: data = CParsingHelper.getSearchGroups( data, '<iframe [^>]*?src="([^"]+?)"')[0] sts, data = self.cm.getPage(data) if not sts: return '' data = CParsingHelper.getSearchGroups( data, '<iframe [^>]*?src="([^"]+?)"')[0] return self.up.getVideoLink(data) else: return CParsingHelper.getSearchGroups( data, 'url: [\'"](http[^\'"]+?)[\'"]')[0] return '' else: return url
def getSearchResult(self, pattern, searchType, page, nextPageCategory, sortBy=''): printDBG( 'YouTubeParser.getSearchResult pattern[%s], searchType[%s], page[%s]' % (pattern, searchType, page)) currList = [] try: url = 'http://www.youtube.com/results?search_query=%s&filters=%s&search_sort=%s&page=%s' % ( pattern, searchType, sortBy, page) sts, data = self.cm.getPage(url, {'host': self.HOST}) if sts: if data.find('data-page="%d"' % (int(page) + 1)) > -1: nextPage = True else: nextPage = False sts, data = CParsingHelper.getDataBeetwenMarkers( data, '<li><div class="yt-lockup', '</ol>', False) data = data.split('<li><div class="yt-lockup') #del data[0] currList = self.parseListBase(data, searchType) if nextPage: item = { 'name': 'history', 'type': 'category', 'category': nextPageCategory, 'pattern': pattern, 'search_type': searchType, 'title': _("Next page"), 'page': str(int(page) + 1) } currList.append(item) except: printExc() return [] return currList
def getSerialsListByLetter(self, baseUrl, cat, letter): letter = letter.replace(' ', '') printDBG("getSerialsListByLetter start letter=%s" % letter) sts, data = self.cm.getPage( baseUrl ) if False == sts: printDBG("getSerialsListByLetter problem with getPage[%s]" % baseUrl) return sts, data = CParsingHelper.getDataBeetwenMarkers(data, 'id="letter_%s">' % letter, '</ul>', False) if False == sts: printDBG("getSerialsListByLetter problem no data beetween markers") return data = re.compile('<a href="([^"]+?)" class="pl-corners">(.+?)</a>').findall(data) if len(data) > 0: for i in range(len(data)): title = remove_html_markup(data[i][1]) url = self.MAINURL + data[i][0].strip() params = {'name':'category', 'category':cat, 'title':title, 'url':url} self.addDir(params)
def getGoldvodList(self, url): printDBG('getGoldvodList entry url[%s]' % url) sts, data = self.cm.getPage(url) if not sts: return sts, data = CParsingHelper.getDataBeetwenMarkers( data, 'id="liveTV-channels">', '</nav>', False) data = data.split('<li>') for item in data: printDBG("item [%r]" % item) try: params = {} params['url'] = "http://goldvod.tv" + re.search( 'href="([^"]+?)"', item).group(1) params['icon'] = re.search('src="([^"]+?)"', item).group(1) params['title'] = re.search('title="([^"]+?)"', item).group(1) params['desc'] = url self.playVideo(params) except: printExc()
def listFilms(self, cItem, page): printDBG("listFilms cItem[%r], page[%s]" % (cItem, page)) url = self.MAINURL + '%s,%s,%s,%s.html' % (cItem['cat'], cItem['sort'], cItem['ver'], page) sts, data = self.cm.getPage(url) if False == sts: return nextPageItem = None if '.html">></a>' in data: page = str(int(page) + 1) nextPageItem = dict(cItem) nextPageItem['page'] = page nextPageItem['title'] = 'Następna strona' sts, data = CParsingHelper.getDataBeetwenMarkers( data, '<ul class="moviesList">', '</ul>', False) data = data.split('<li data-url=') self.listItems(data, 'video', nextPageItem, None, True, cItem.get('ver', ''))
def getFilmTab(self, url, category, pager): sts, data = self.cm.getPage( url, {'header': self.HEADER } ) if not sts: return nextPage = re.search('<li><a href="/filmy?.+?" rel="next">»</a></li>', data) data = CParsingHelper.getDataBeetwenMarkers(data, '<div class="row-fluid movie-item">', '<div class="container">', False)[1] data = data.split('<div class="row-fluid movie-item">') titleA = re.compile('<a class="title"[^>]+?>') titleB = re.compile('</small>') plotA = re.compile('<p class="desc">') plotB = re.compile('</div>') for item in data: title = CParsingHelper.getDataBeetwenReMarkers(item, titleA, titleB, False)[1] page = self.MAINURL + CParsingHelper.getSearchGroups(item, 'class="title" href="([^"]+?)"', 1)[0] plot = CParsingHelper.getDataBeetwenReMarkers(item, plotA, plotB, False)[1] img = CParsingHelper.getSearchGroups(item, 'src="([^"]+?)"', 1)[0] if '' != title and '' != page: params = {'title': title, 'page': page, 'icon': img, 'plot': plot} self.addVideo(params) if nextPage: params = {'name': 'nextpage', 'category': category, 'title': 'Następna strona', 'page': str(int(pager) + 1)} self.addDir(params)