def get_movie_links(self, url): print 'Get movie links: {url}'.format(url=url) data = util.get_remote_data(url) product = SoupStrainer( 'a', href=re.compile("^http\:\/\/www\.power4link\.us")) soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) items = [] pk_regex = re.compile('.*\/.*-(.*)\/') for a in soup: if a.text.startswith('Play'): link = a['href'].encode('utf-8', 'ignore') match = pk_regex.search(link) if match: label = match.group(1) pk = label items.append({ 'label': label, 'url': link, 'pk': pk, 'is_playable': True }) return items
def get_next_link(self, url): ''' Get next page link ''' print 'Get next page link: {url}'.format(url=url) data = util.get_remote_data(url) # Get list of movie titles product = SoupStrainer('div', {'class': 'wp-pagenavi'}) soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) current_item = soup.find('span', {'class': 'current'}) if current_item: next_item = current_item.findNextSibling() item = { 'label': '[B]Next >> [/B]', 'url': next_item['href'], 'pk': next_item.text } return item return None
def get_menu_movies(self, url): ''' Get movie titles for category ''' print 'Get list movies: {url}'.format(url=url) data = util.get_remote_data(url) # Get list of movie titles product = SoupStrainer('div', {'class': 'entry clearfix'}) soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) items = [] pk_regex = re.compile('\/([\w\-]+)\/') for item in soup: link = item.a['href'].encode('utf-8', 'ignore') thumb = item.a.img['src'].encode('utf-8', 'ignore') info = item.p.text pk = pk_regex.search(item.a['href']).group(1) items.append({ 'label': item.text, 'url': link, 'thumb': thumb, 'info': info, 'pk': pk, 'is_playable': True }) return items
def get_menu_category(self): ''' Get main list of categories ''' print 'Get list categories' url = self.MAIN_URL data = util.get_remote_data(url) product = SoupStrainer('div', {'class': 'menu-secondary-container'}) soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) items = [] for item in soup.findAll('li'): link = item.a['href'].encode('utf-8', 'ignore') pk = item['id'] # ignore invalid links if not 'category/' in link: continue items.append({ 'label': item.text, 'url': link, 'pk': pk, }) return items
def get_movie_links(self, url): print 'Get movie links: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Movie_Link ) items = [] for item in soup.findAll('a', href=True): lower = item.text.lower() if 'in' in lower and 'now' in lower: label = util.encode(item.text) link = util.encode(item['href']) pk = label items.append({ 'label': label, 'url': link, 'pk': pk, 'is_playable': True }) return items
def get_next_link(self, url): ''' Get next page link ''' print 'Get next page link: {url}'.format(url=url) data = util.get_remote_data(url) # Get list of movie titles product = SoupStrainer('div', {'class': 'wp-pagenavi'}) soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) current_item = soup.find('span', {'class': 'current'}) if current_item: next_item = current_item.findNextSibling() if next_item: item = { 'label': '[B]Next >> [/B]', 'url': next_item['href'], 'pk': next_item.text } return item return None
def browse_frame(self, frameid, url): data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) frameid = int(frameid) moduleid = self.frames[frameid]['moduleid'] containstype = self.frames[frameid]['containstype'] items = [] linklist = soup.find('div', id=moduleid).findAll('a') for l in linklist: tagline = HTMLParser.HTMLParser().unescape(l.text) link = l['href'] tid = self.get_sub_id(link) if tid: link = self.base_url + self.thread_url_template + tid items.append({ 'label': tagline, 'url': link, 'pk': tid }) sorted_items = sorted(items, key=lambda item: item['label']) return sorted_items, containstype
def get_show_menu(self, language, base_url): ''' Get list of shows for selected season''' url = self.get_show_config_file(language) print 'Get show menu: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulStoneSoup(data, convertEntities=BeautifulSoup.XML_ENTITIES) items = [] for item in soup.menu_item_sub.findAll('sub_item'): t = item['text'] l = item['url'] pk = item['id'] r = re.compile('\d+').findall(t) if r: pk = r[0] lnk = '{base}/{page}'.format( base=base_url, page=l) items.append({ 'label': t, 'url': lnk, 'pk': pk }) return items
def get_next_link(self, url): ''' Get next page link ''' print 'Get next page link: {url}'.format(url=url) data = util.get_remote_data(url) # Get list of movie titles soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Next_Link) if soup and soup.a: link = soup.a['href'] page = link.rstrip('/').rsplit('/', 1)[1] item = { 'label': '[B]Next >> [/B]', 'thumb': '', 'info': '', 'url': link, 'pk': page, 'is_playable': False } return item return None
def browse_frame(self, frameid, url): #print '{name} - fetching {url}'.format(name=self.short_name, url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) frameid = int(frameid) containstype = self.frames[frameid]['containstype'] items = [] linklist = soup.findAll('a') for l in linklist: tagline = HTMLParser.HTMLParser().unescape(l.text) link = l['href'] fid = self.get_sub_id(link) if fid: link = self.base_url + self.section_url_template + fid items.append({ 'label': tagline, 'url': link, 'pk': fid }) sorted_items = sorted(items, key=lambda item: item['label']) return sorted_items, containstype
def get_season_menu(self, siteid, language): ''' Get list of seasons for selected country''' url = self.get_season_config_file(language) print 'Get season menu: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulStoneSoup(data, convertEntities=BeautifulSoup.XML_ENTITIES) items = [] if soup.seasons is None: items.append({ 'label': 'Season {default_season}'.format( default_season=self.default_season), 'url': '{base}{lang}season{season}'.format( base=self.base_url, lang=language, season=self.default_season), 'pk': self.default_season }) else: for item in soup.seasons.findAll('season'): t = item['text'] r = re.compile('\d+').findall(t) pk = r[0] url = '{base}{lang}season{season}'.format( base=self.base_url, lang=language, season=pk) items.append({ 'label': t, 'url': url, 'pk': pk }) return items
def get_movie_links(self, url): print 'Get movie links: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Movie_Link) items = [] pk_regex = re.compile('http://([\w\.]+)\/(?:([\w-]+)\/|)') for a in soup: if ('Full' in a.text or \ 'Play' in a.text) and \ 'Online' in a.text: link = util.encode(a['href']) match = pk_regex.search(link) if match: group1 = match.group(1) group2 = match.group(2) label = group2 if group2 else group1 pk = label items.append({ 'label': label, 'url': link, 'pk': pk, 'is_playable': True }) return items
def get_movie_links(self, url): print 'Get movie links: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Movie_Link ) items = [] pk_regex = re.compile('http://([\w\.]+)\/(?:([\w-]+)\/|)') for a in soup: if ('Full' in a.text or \ 'Play' in a.text) and \ 'Online' in a.text: link = util.encode(a['href']) match = pk_regex.search(link) if match: group1 = match.group(1) group2 = match.group(2) label = group2 if group2 else group1 pk = label items.append({ 'label': label, 'url': link, 'pk': pk, 'is_playable': True }) return items
def get_menu_category(self, api): ''' Get main list of categories ''' print 'Get list categories' url = self.BASE_URL data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Category) items = [] for item in soup.findAll('li'): if item.a.has_attr('href'): link = util.encode(item.a['href']) pk = item['id'] # ignore invalid links if 'category/' not in link: continue items.append({ 'label': item.a.text, 'url': link, 'pk': pk, }) return items
def get_movie_links(self, url): print 'Get movie links: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Movie_Link ) items = [] for item in soup.findAll('p'): if item.strong and item.a: href = item.a.get('href', None) if href: label = util.encode(item.strong.text) link = util.encode(href) pk = label items.append({ 'label': label, 'url': link, 'pk': pk, 'is_playable': True }) return items
def get_movie_links(self, url): print 'Get movie links: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Movie_Link) items = [] for item in soup.findAll('p'): if item.strong and item.a: href = item.a.get('href', None) if href: label = util.encode(item.strong.text) link = util.encode(href) pk = label items.append({ 'label': label, 'url': link, 'pk': pk, 'is_playable': True }) return items
def get_menu_category(self): ''' Get main list of categories ''' print 'Get list categories' url = self.MAIN_URL data = util.get_remote_data(url) product = SoupStrainer('div', {'class': 'menu-secondary-container'}) soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) items = [] for item in soup.findAll('li'): link = item.a['href'].encode('utf-8', 'ignore') pk = item['id'] items.append({ 'label': item.text, 'url': link, 'pk': pk, }) return items
def get_next_link(self, url): ''' Get next page link ''' print 'Get next page link: {url}'.format(url=url) data = util.get_remote_data(url, False) # Get list of movie titles soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Next_Link ) if soup and soup.a: link = soup.a['href'] page = link.rstrip('/').rsplit('/', 1)[1] item = { 'label': '[B]Next >> [/B]', 'thumb': '', 'info': '', 'url': link, 'pk': page, 'is_playable': False } return item return None
def get_movie_links(self, url): print 'Get movie links: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Movie_Link) items = [] for item in soup.findAll('a', href=True): lower = item.text.lower() if 'in' in lower and 'now' in lower: label = util.encode(item.text) link = util.encode(item['href']) pk = label items.append({ 'label': label, 'url': link, 'pk': pk, 'is_playable': True }) return items
def get_menu_category(self, api): ''' Get main list of categories ''' print 'Get list categories' url = self.BASE_URL data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Category ) items = [] pk_regex = re.compile('\/([\w\-]+)\/') for item in soup.findAll('li'): if item.a.has_attr('href'): link = util.encode(item.a['href']) # ignore invalid links if 'category/' not in link: continue pk = pk_regex.search(item.a['href']).group(1) items.append({ 'label': item.a.text, 'url': link, 'pk': pk, }) return items
def get_menu_movies(self, url): ''' Get movie titles for category ''' print 'Get list movies: {url}'.format(url=url) data = util.get_remote_data(url) # Get list of movie titles product = SoupStrainer('div', {'class': 'entry clearfix'}) soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) items = [] pk_regex = re.compile('\/([\w\-]+)\/') for item in soup: link = item.a['href'].encode('utf-8', 'ignore') thumb = item.a.img['src'].encode('utf-8', 'ignore') info = item.p.text pk = pk_regex.search(item.a['href']).group(1) items.append({ 'label': item.text, 'url': link, 'thumb': thumb, 'info': info, 'pk': pk, 'is_playable': False }) return items
def get_movie_links(self, url): print 'Get movie links: {url}'.format(url=url) data = util.get_remote_data(url) product = SoupStrainer('a', href=re.compile("^http\:\/\/www\.power4link\.us")) soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) items = [] pk_regex = re.compile('.*\/.*-(.*)\/') for a in soup: if a.text.startswith('Play'): link = a['href'].encode('utf-8', 'ignore') match = pk_regex.search(link) if match: label = match.group(1) pk = label items.append({ 'label': label, 'url': link, 'pk': pk, 'is_playable': True }) return items
def get_show_menu(self, channelid): ''' Get shows for specified channel''' url = '{base}{section}{pk}{style}'.format( base=self.base_url, section=self.section_url_template, pk=channelid, style=self.mobile_style) print 'Get show menu: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) channels = [] shows = [] try: sub = soup.find('ul', attrs={ 'data-role': 'listview', 'data-theme': 'd', 'class': 'forumbits' }) h = sub.findAll('li') linklist = self.get_parents(h) if linklist and len(linklist) > 0: for l in linklist: tagline = HTMLParser.HTMLParser().unescape(l.a.text) link = self.base_url + l.a['href'] fid = self.get_sub_id(link) data = { 'label': tagline, 'url': link, 'pk': fid, } if (l.get('data-has-children')): channels.append(data) else: shows.append(data) except: pass # This forum has a number of uncategorized threads. # Display uncategorized episode threads under Uncategorized container = soup.find('ul', id='threads') if container and len(container) > 0: shows.append({ 'label': '[COLOR white][B]Uncategorized Episodes[/B][/COLOR]', 'url': url, 'pk': channelid, }) return channels, shows
def get_show_menu(self, channelid): ''' Get shows for specified channel''' url = '{base}{section}{pk}{style}'.format( base=self.base_url, section=self.section_url_template, pk=channelid, style=self.mobile_style) print 'Get show menu: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) channels = [] shows = [] try: sub = soup.find('ul', attrs={ 'data-role': 'listview', 'data-theme': 'd', 'class': 'forumbits'}) h = sub.findAll('li') linklist = self.get_parents(h) if linklist and len(linklist) > 0: for l in linklist: tagline = HTMLParser.HTMLParser().unescape(l.a.text) link = self.base_url + l.a['href'] fid = self.get_sub_id(link) data = { 'label': tagline, 'url': link, 'pk': fid, } if (l.get('data-has-children')): channels.append(data) else: shows.append(data) except: pass # This forum has a number of uncategorized threads. # Display uncategorized episode threads under Uncategorized container = soup.find('ul', id='threads') if container and len(container) > 0: shows.append({ 'label': '[COLOR white][B]Uncategorized Episodes[/B][/COLOR]', 'url': url, 'pk': channelid, }) return channels, shows
def resolve_redirect(self, url): print 'Resolving redirect: {url}'.format(url=url) data = util.get_remote_data(url) product = SoupStrainer('iframe') soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) if soup.iframe: return soup.iframe['src'] return None
def get_xml_data(self, url): print 'Fetching xml from url: {url}'.format( url=url) data = util.get_remote_data(url) soup = BeautifulStoneSoup( data, convertEntities=BeautifulStoneSoup.XML_ENTITIES) items = [] # not expecting channels for item in soup('item'): if (item.title): name = HTMLParser.HTMLParser().unescape(item.title.string) thumbnail = item.thumbnail.string url = '' # helps with sorting is_stream = 0 if (item.link != None and item.link.string != None): url = item.link.string is_stream = 1 if '/' in url else 0 if item.regex: livestream_regex = s.LiveStreamRegex( # name is a BeautifulSoup keyword item.regex('name')[0].string, item.regex.expres.string, item.regex.page.string) try: livestream_regex.refer = item.regex.referer.string except: pass #try: # livestream_regex.agent = item.regex.agent #except: # pass else: livestream_regex = s.LiveStreamRegex() items.append({ 'label': name, 'url': url, 'thumb': thumbnail, 'regex': livestream_regex, 'is_stream': is_stream, }) return sorted(items, key=operator.itemgetter('is_stream', 'label'))
def get_movie(self, url): print 'Get movie: {url}'.format(url=url) data = util.get_remote_data(url) product = SoupStrainer('iframe') soup = BeautifulStoneSoup(data, parseOnlyThese=product, convertEntities=BeautifulSoup.XML_ENTITIES) if soup.iframe: return soup.iframe['src'] return None
def get_show_menu(self, channelid): ''' Get shows for specified channel''' url = '{base}{section}{pk}{style}'.format( base=self.base_url, section=self.section_url_template, pk=channelid, style=self.mobile_style) print 'Get show menu: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) sub = soup.find('ul', attrs={ 'data-role': 'listview', 'data-theme': 'd', 'class': 'forumbits'}) h = sub.findAll('li') linklist = self.get_parents(h) channels = [] shows = [] if linklist and len(linklist) > 0: # New items on top linklist = sorted(linklist, key=lambda l: self.has_new_episodes(l), reverse=True) for l in linklist: tagline = HTMLParser.HTMLParser().unescape( l.a.text.encode('utf-8', 'ignore')) link = self.base_url + l.a['href'].encode('utf-8', 'ignore') fid = self.get_sub_id(link) # identify new items if (self.has_new_episodes(l)): tagline = tagline + ' [B]**NEW**[/B]' data = { 'label': tagline, 'url': link, 'pk': fid, } if (l.get('data-has-children')): channels.append(data) else: shows.append(data) return channels, shows
def get_parsed_url(self, url, regex): print 'Fetching parsed url: {url} -- {regex}'.format(url=url, regex=regex) doregex = re.compile('\$doregex\[([^\]]*)\]').findall(url) for name in doregex: if name == regex.label: content = util.get_remote_data(regex.page, True, regex.refer) r = re.compile(regex.expres).search(content) url = url.replace('$doregex[{name}]'.format(name=name), r.group(1).strip()) break return url
def get_parsed_url(self, url, regex): print 'Fetching parsed url: {url} -- {regex}'.format( url=url, regex=regex) doregex = re.compile('\$doregex\[([^\]]*)\]').findall(url) for name in doregex: if name == regex.label: content = util.get_remote_data(regex.page, True, regex.refer) r = re.compile(regex.expres).search(content) url = url.replace( '$doregex[{name}]'.format(name=name), r.group(1).strip()) break return url
def get_xml_data(self, url): print 'Fetching xml from url: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulStoneSoup( data, convertEntities=BeautifulStoneSoup.XML_ENTITIES) items = [] # not expecting channels for item in soup('item'): name = HTMLParser.HTMLParser().unescape(item.title.string) url = item.link.string thumbnail = item.thumbnail.string # helps with sorting is_stream = 1 if '/' in url else 0 if item.regex: livestream_regex = s.LiveStreamRegex( # name is a BeautifulSoup keyword item.regex('name')[0].string, item.regex.expres.string, item.regex.page.string) try: livestream_regex.refer = item.regex.referer.string except: pass #try: # livestream_regex.agent = item.regex.agent #except: # pass else: livestream_regex = s.LiveStreamRegex() items.append({ 'label': name, 'url': url, 'thumb': thumbnail, 'regex': livestream_regex, 'is_stream': is_stream, }) return sorted(items, key=operator.itemgetter('is_stream', 'label'))
def get_show_menu(self, channelid): ''' Get shows for specified channel''' url = '{base}{section}{pk}{style}'.format( base=self.base_url, section=self.section_url_template, pk=channelid, style=self.mobile_style) print 'Get show menu: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) sub = soup.find('ul', attrs={ 'data-role': 'listview', 'data-theme': 'd', 'class': 'forumbits' }) h = sub.findAll('li') linklist = self.get_parents(h) channels = [] shows = [] if linklist and len(linklist) > 0: for l in linklist: tagline = HTMLParser.HTMLParser().unescape(l.a.text) link = self.base_url + l.a['href'] fid = self.get_sub_id(link) data = { 'label': tagline.encode('utf-8', 'ignore'), 'url': link, 'pk': fid, } if (l.get('data-has-children')): channels.append(data) else: shows.append(data) return channels, shows
def get_episode_data(self, url): url = '{url}{style}'.format( url=url, style=self.mobile_style) print 'Get episode data: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) linklist = soup.find('ol', id='posts').find( 'blockquote', 'postcontent restore').findAll('a') # correct links for erroneous formatting cleanlinks = util.clean_post_links(linklist) # parse post links p = Post(self.match_string) progress = xbmcgui.DialogProgress() progress.create('[B]Processing found links[/B]') total = len(cleanlinks) current = 0 for url, text in cleanlinks.items(): current += 1 percent = (current * 100) // total msg = 'Processing {current} of {total}'.format( current=current, total=total) progress.update(percent, '', msg, '') if progress.iscanceled(): break # process here p.add_link(url, text) progress.close() items = [{ 'label': HTMLParser.HTMLParser().unescape(part.text), 'partnum': num, 'media': part.media } for num, part in sorted(p.parts.items())] return items
def get_episode_data(self, url): url = '{url}{style}'.format(url=url, style=self.mobile_style) print 'Get episode data: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) linklist = soup.find('ol', id='posts').find( 'blockquote', 'postcontent restore').findAll('a') # correct links for erroneous formatting cleanlinks = util.clean_post_links(linklist) # parse post links p = Post(self.match_string) progress = xbmcgui.DialogProgress() progress.create('[B]Processing found links[/B]') total = len(cleanlinks) current = 0 for url, text in cleanlinks.items(): current += 1 percent = (current * 100) // total msg = 'Processing {current} of {total}'.format(current=current, total=total) progress.update(percent, '', msg, '') if progress.iscanceled(): break # process here p.add_link(url, text) progress.close() items = [{ 'label': HTMLParser.HTMLParser().unescape(part.text), 'partnum': num, 'media': part.media } for num, part in sorted(p.parts.items())] return items
def get_episode_menu(self, base_url, url): ''' Get list of entries for selected episode''' print 'Get episode menu: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.ALL_ENTITIES) items = [] vidlist = soup.find('div', attrs={'class': 'thumbnails scroll-pane'}) for item in vidlist.ul.findAll('li'): pk = item.a['href'] txt = ''.join(item.a.findAll(text=True)).strip() lnk = item.a['rel'] r = re.compile('(.+?)\?').findall(lnk) if r: lnk = r[0] tb = item.a.span.img['src'] thumb = '{base}/{img}'.format(base=base_url, img=tb) desc = '' icontainer = soup.find('ul', {'class': 'songInfo'}) if icontainer: info = icontainer.find('li', {'class': pk}) if info.p: desc = info.text.encode('utf-8', 'ignore') items.append({ 'label': txt, 'url': lnk, 'thumb': thumb, 'pk': pk, 'plot': desc, }) return items
def resolve_redirect(self, url): print 'Resolving redirect: {url}'.format(url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, 'html.parser') link = None iframe = soup.find('iframe') if iframe: link = iframe.get('data-lazy-src', None) or \ iframe.get('src', None) else: direct = soup.find('a', src=re.compile(r'embed')) or \ soup.find('a', {'class': 'aio-orange-medium'}) or \ soup.find('a', {'class': 'main-button dlbutton'}) or \ soup.find('a', rel='nofollow') if direct: link = direct.get('href', None) print 'Resolving link: {link}'.format(link=link) return link
def get_episode_menu(self, url, page=1): ''' Get episodes for specified show ''' url = '{url}{style}'.format( url=url, style=self.mobile_style) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) items = [] next_url = None container = soup.find('ul', id='threads') if container and len(container) > 0: linklist = container.findAll('h3') for l in linklist: tagline = HTMLParser.HTMLParser().unescape( l.a.text.encode('utf-8', 'ignore')) link = l.a['href'].encode('utf-8', 'ignore') tid = self.get_sub_id(link) items.append({ 'label': tagline, 'url': self.base_url + link, 'pk': tid, }) navlink = soup.find('div', attrs={'data-role': 'vbpagenav'}) if navlink: total_pages = int(navlink['data-totalpages']) if (total_pages and total_pages > page): pg = url.find('&page=') url = url[:pg] if pg > 0 else url next_url = url + '&page=' + str(page + 1) return items, next_url
def browse_frame(self, frameid, url): #print '{name} - fetching {url}'.format(name=self.short_name, url=url) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) frameid = int(frameid) containstype = self.frames[frameid]['containstype'] items = [] linklist = soup.findAll('a') for l in linklist: tagline = HTMLParser.HTMLParser().unescape(l.text) link = l['href'] fid = self.get_sub_id(link) if fid: link = self.base_url + self.section_url_template + fid items.append({'label': tagline, 'url': link, 'pk': fid}) sorted_items = sorted(items, key=lambda item: item['label']) return sorted_items, containstype
def get_menu_movies(self, url): ''' Get movie titles for category ''' print 'Get list movies: {url}'.format(url=url) data = util.get_remote_data(url) # Get list of movie titles soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Movies) items = [] pk_regex = re.compile('\/([\w\-]+)\/') for item in soup: img = item.a.img thumb = util.encode(img['src']) if img else '' link = util.encode(item.a['href']) txt = item.text.strip() or item.a.get('title', None) info = util.encode(txt.strip()) label = info pk = pk_regex.search(item.a['href']).group(1) items.append({ 'label': label, 'url': link, 'thumb': thumb, 'info': info, 'pk': pk, 'is_playable': False }) return items
def get_menu_movies(self, url): ''' Get movie titles for category ''' print 'Get list movies: {url}'.format(url=url) data = util.get_remote_data(url, False) # Get list of movie titles soup = BeautifulSoup(data, 'html.parser', parse_only=self.SoupStrainer_Movies ) items = [] pk_regex = re.compile('\/([\w\-]+)\/') for item in soup: img = item.a.img thumb = util.encode(img['src']) if img else '' link = util.encode(item.a['href']) txt = item.text.strip() or item.a.get('title', None) info = util.encode(txt.strip()) label = info pk = pk_regex.search(item.a['href']).group(1) items.append({ 'label': label, 'url': link, 'thumb': thumb, 'info': info, 'pk': pk, 'is_playable': False }) return items
def get_episode_menu(self, url, page=1): ''' Get episodes for specified show ''' url = '{url}{style}'.format( url=url, style=self.mobile_style) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) items = [] next_url = None container = soup.find('ol', id='threads') if container and len(container) > 0: linklist = container.findAll('h3') for l in linklist: tagline = HTMLParser.HTMLParser().unescape( l.a.text.encode('utf-8', 'ignore')) link = l.a['href'].encode('utf-8', 'ignore') tid = self.get_sub_id(link) items.append({ 'label': tagline, 'url': self.base_url + link, 'pk': tid, }) navlink = soup.find('div', attrs={'class': 'threadpagenav'}) if navlink: anchor = navlink.find('a', attrs={'rel': 'next'}) if anchor: next_url = self.base_url + anchor['href'] return items, next_url
def get_episode_menu(self, url, page=1): ''' Get episodes for specified show ''' url = '{url}{style}'.format(url=url, style=self.mobile_style) data = util.get_remote_data(url) soup = BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES) items = [] next_url = None container = soup.find('ul', id='threads') if container and len(container) > 0: linklist = container.findAll('h3') for l in linklist: tagline = HTMLParser.HTMLParser().unescape(l.a.text) link = l.a['href'] tid = self.get_sub_id(link) items.append({ 'label': tagline.encode('utf-8', 'ignore'), 'url': self.base_url + link, 'pk': tid, }) navlink = soup.find('div', attrs={'data-role': 'vbpagenav'}) if navlink: total_pages = int(navlink['data-totalpages']) if (total_pages and total_pages > page): pg = url.find('&page=') url = url[:pg] if pg > 0 else url next_url = url + '&page=' + str(page + 1) return items, next_url