def getTVshows(url,order=None): progress = xbmcgui.DialogProgress() progress.create('Progress', 'Please wait...') progress.update(1, "", "Loading list - 1%", "") div = BeautifulSoup(http_req(url)).find("div", {"id": "tab1"}) if not order: tvs = div.findAll("a") else: tvs = [s.parent for s in div.findAll("a", text = re.compile(r"^" + order + ".+?$"))] current = 0 total = len(tvs) while current <= total - 1: title = htmlFilter(tvs[current].text) link = urlFilter(tvs[current]['href']) addDir(title, link, 2) if progress.iscanceled(): sys.exit() percent = int(((current + 1) * 100) / total) message = "Loading list - " + str(percent) + "%" progress.update(percent, "", message, "") current += 1 progress.close() xbmcplugin.endOfDirectory(int(sys.argv[1]))
def getTVshows(url,order=None): progress = xbmcgui.DialogProgress() progress.create('Progress', 'Please wait...') progress.update(1, "", "Loading list - 1%", "") div = BeautifulSoup(http_req(url)).find("div", {"id": "tab1"}) if not order: tvs = div.findAll("a") else: tvs = [s.parent for s in div.findAll("a", text = re.compile(r"^" + order + ".+?$"))] current = 0 total = len(tvs) while current <= total - 1: title = htmlFilter(tvs[current].text) link = urlFilter(tvs[current]['href']) addDir(title, link, 2) if progress.iscanceled(): sys.exit() percent = int(((current + 1) * 100) / total) message = "Loading list - " + str(percent) + "%" progress.update(percent, "", message, "") current += 1 progress.close() xbmcplugin.endOfDirectory(int(sys.argv[1]))
def buildCategories(): categories = ['All'] homeurl = 'http://www.washingtonpost.com/posttv' homehtml = getUrl(homeurl) soup = BeautifulSoup(homehtml) results = [] for div in soup.findAll("li", { "class" : re.compile("livenav") }): print div for links in div.findAll('a'): if links.has_key('href'): link = (links.get('href')) for titles in div.findAll("span"): title = titles.string title = title.replace(" ","") results.append((title, link)) categories.append(title) for i in range(0,len(categories)): if categories[i] == 'All': catName, catSlug = 'All', 'videos' else: catName, catSlug = categories[i], categories[i].replace(' ', '-').lower() callback = 'plugin://plugin.video.posttv?category=' + catSlug li = xbmcgui.ListItem(catName) xbmcplugin.addDirectoryItem(handle=addon_handle, url=callback, listitem=li, isFolder=True) xbmcplugin.endOfDirectory(addon_handle)
def getMovieLens(url): req = urllib2.Request(url) req.add_header('User-Agent', "%s %s" % (sys.modules[ "__main__" ].__plugin__, sys.modules[ "__main__" ].__version__)) response = urllib2.urlopen(req) soup = BeautifulSoup(response) mySpans= soup.findAll('span',attrs={"class" : "movieTitle"}) ##mySpans = mySpans[1], mySpans[2] mydirs = list() pDialog = xbmcgui.DialogProgress() ret = pDialog.create('XBMC', 'Initializing script...') i = 0.0 total = len(mySpans) for span in mySpans: i += 1 percent = int( (i * 100) / total) pDialog.update(percent, 'Searching ' + span.a.string,str(int(i))+'/'+str(total)) s = span.a.string year = s[len(s)-7 : len(s)] year = year.replace('(','').replace(')','') year = year.strip() s = s.split('(',1)[0].strip() s = s.replace(', The','') #print s xbmc.log('s=%s' % s) dirs= getter.searchDirs(s + ' ' + year) if dirs: for d in dirs: id = d.getElementsByTagName('id').item(0).firstChild.data name = d.getElementsByTagName('name').item(0).firstChild.data date = d.getElementsByTagName('date').item(0).firstChild.data thumb = d.getElementsByTagName('thumb').item(0).firstChild.data url = sys.argv[0] + '?action=files&did=' + id clean = CleanFileName(name,False) clean = clean.replace('(', '') clean = clean.replace('[', '') clean = clean.replace(']', '') clean = clean.replace(')', '') if s.lower() in clean.lower() and year in name: ##xbmc.log('name=%s' % name) mydirs.append(d) if (pDialog.iscanceled()): print 'Canceled search' pDialog.close() return pDialog.close() return mydirs
def get_lcs_standings(teamName): # This method loads the latest standings from the Gamepedia server url = '' if (teamName in TEAMS_EU): url = PluginUtils.unescape(PluginUtils.get_string(30104)) if (teamName in TEAMS_NA): url = PluginUtils.unescape(PluginUtils.get_string(30103)) if (url != ''): response = PluginUtils.do_request(url) if (response is not None): # Lets process the html # decoded_data = json.load(response) soup = BeautifulSoup(response) tables = soup.findAll('table') if (tables is not None): for table in tables: # We have the table, now lets try and get the right row rows = table.find('tbody').findAll('tr') if (rows is not None): for idx, row in enumerate(rows): columns = row.findAll('td') if (columns is not None): if (columns[2] is not None): if (teamName in TEAMS_EU): if (columns[2].find('a').text.lower() == TEAMS_EU[teamName].lower()): return { 'standing': idx + 1, 'record': columns[3].find('span').text + "W-" + columns[4].find('span').text + "L" } if (teamName in TEAMS_NA): if (columns[2].find('a').text.lower() == TEAMS_NA[teamName].lower()): return { 'standing': idx + 1, 'record': columns[3].find('span').text + "W-" + columns[4].find('span').text + "L" } return None
def recentlyAdded(cat): html = http_req(siteUrl) soup = BeautifulSoup(html).findAll('div', {'id': 'tab1'}) if cat == 'tvshows': soup = soup[0] param = 'seriale' elif cat == 'movies': soup = soup[1] param = 'filme' results = soup.findAll('a', href=re.compile(param), limit=20) total = len(results) current = 0 for a in results: ep_year = a.parent.parent.findAll('div')[1].text.strip() title = htmlFilter(a.text) url = urlFilter(a['href']) if cat == 'tvshows': eps = re.search(r'S(\d+)E(\d+-?\d*)', ep_year) season = str(eps.group(1)) if eps else '' episode = str(eps.group(2)) if eps else '' name = '%s %sx%s' % (title, season, episode) addDir(name, url, 8, "", title, season, episode, folder=False, totalItems=total) elif cat == 'movies': year = re.search('(\d{4,4})', ep_year) year = str(year.group(1)) if year else 'unknown' name = '%s (%s)' % (title, year) addDir(name, url, 8, "", name, folder=False, totalItems=total) xbmcplugin.endOfDirectory(int(sys.argv[1]))
def scrapelinks(html): soup = BeautifulSoup(html) results = [] for div in soup.findAll("div", { "class" : "search-results-slide-item-cont" }): for links in div.findAll('a'): if links.has_key('href'): if '/posttv/c/video/' in links['href']: link = (links.get('href')) for images in links.findAll('div', { "class" : "search-results-slide-image image-lazy-background" }): image = images.get('data-image-src') for titles in div.findAll("div", { "class" : "search-results-slide-caption" }): title = titles.string title = title.replace(" ","") results.append((title, link, image)) return results
def streamGetter(url): ## Open the PostTV video URL response = urllib2.urlopen(url) page_source = response.read() ## Pull some metadata from the video page source soup = BeautifulSoup(page_source) for titles in soup.findAll("meta", attrs={"property":"og:title"}): title = titles.get("content") for descs in soup.findAll("meta", attrs={"property":"og:description"}): description = descs.get("content") for images in soup.findAll("meta", attrs={"property":"og:image"}): image = images.get("content") ## Look for the Ooyala player string m = re.search('"((http)?://player.ooyala.com/player_v2.swf(?!.*adSetCode).*?)"', page_source) url = m.group() ## strip off some extra quotes cleanURL = url.replace("\"","") ## Grabs the embed code from the cleaned URL ec = re.search('((?<=embedCode=)(.*)(?=&autoplay))', cleanURL) embedCode = ec.group() ## Decrypts the embed code and returns a stream path smil = CommonUtils().grabEncrypted(embedCode) decrypted_smil = ooyalaCrypto().ooyalaDecrypt(smil) videoList = MagicNaming().getVideoUrl(decrypted_smil) videoArray = ''.join(videoList) ## Pulls the playpath from the stream path Segments = videoArray.rsplit('/',2) playpath = 'mp4:s/' + Segments[1]+ '/' +Segments[2] ## Returns the title, description, thumbnail url and playpath return (title, description, image, playpath)
def recentlyAdded(cat): html = http_req(siteUrl) soup = BeautifulSoup(html).findAll('div', {'id': 'tab1'}) if cat == 'tvshows': soup = soup[0] param = 'seriale' elif cat == 'movies': soup = soup[1] param = 'filme' results = soup.findAll('a', href=re.compile(param), limit=20) total = len(results) current = 0 for a in results: ep_year = a.parent.parent.findAll('div')[1].text.strip() title = htmlFilter(a.text) url = urlFilter(a['href']) if cat == 'tvshows': eps = re.search(r'S(\d+)E(\d+-?\d*)', ep_year) season = eps.group(1) if eps else '' episode = eps.group(2) if eps else '' name = '%s %sx%s' % (title, season, episode) addDir(name,url,8,"",title,season,episode,folder=False,totalItems=total) elif cat == 'movies': year = re.search('(\d{4,4})', ep_year) year = year.group(1) if year else 'unknown' name = '%s (%s)' % (title, year) addDir(name,url,8,"",name,folder=False,totalItems=total) xbmcplugin.endOfDirectory(int(sys.argv[1]))
def get_lcs_standings(teamName): # This method loads the latest standings from the Gamepedia server url='' if (teamName in TEAMS_EU): url = PluginUtils.unescape(PluginUtils.get_string(30104)) if (teamName in TEAMS_NA): url = PluginUtils.unescape(PluginUtils.get_string(30103)) if (url != ''): response = PluginUtils.do_request(url) if (response is not None): # Lets process the html # decoded_data = json.load(response) soup = BeautifulSoup(response) tables = soup.findAll('table') if (tables is not None): for table in tables: # We have the table, now lets try and get the right row rows = table.find('tbody').findAll('tr') if (rows is not None): for idx, row in enumerate(rows): columns = row.findAll('td') if (columns is not None): if (columns[2] is not None): if (teamName in TEAMS_EU): if (columns[2].find('a').text.lower() == TEAMS_EU[teamName].lower()): return {'standing' : idx+1, 'record' : columns[3].find('span').text + "W-" + columns[4].find('span').text +"L" } if (teamName in TEAMS_NA): if (columns[2].find('a').text.lower() == TEAMS_NA[teamName].lower()): return {'standing' : idx+1, 'record' : columns[3].find('span').text + "W-" + columns[4].find('span').text +"L"} return None
def load_event_content(eventId): LoLEventDay = namedtuple('LoLEventDay', 'dayId day matches recommended imageUrl') LoLEventMatch = namedtuple('LoLEventMatch', 'gameId team1 team2 videoLinks') url = LOLMATCHESURL % eventId response = PluginUtils.do_request(url) if (response is None): return None # Now lets parse results decoded_data = json.load(response) selfText = decoded_data[0]['data']['children'][0]['data']['selftext_html'] eventTitle = '' days = [] soup = BeautifulSoup(PluginUtils.unescape(selfText)) # Get all the recommended matches, we add those to the events # We do it like this Game H1_C1_C4 recommended = '' #a href="/spoiler" spoilers = soup.findAll("a", href="/spoiler") if (spoilers is not None): for spoiler in spoilers: # add them to the list games = spoiler.text.replace(',', '_') recommended += games + "_" imgUrl = '' link = soup.find('a', href='#EVENT_PICTURE') if (link is not None): imgUrl = link.title # find all tables tables = soup.findAll("table") for idx, table in enumerate(tables): if (table is not None): titleLink = table.find("a", href="http://www.table_title.com") if (titleLink is not None): eventTitle = titleLink['title'] YouTubeColumns = [] Team1Index = -1 Team2Index = -1 # Investigate the right columns for youtube links rows = table.find("thead").findAll("tr") for row in rows : cols = row.findAll("th") for i, col in enumerate(cols): if (col.text.lower() == "youtube"): YouTubeColumns.append(i) if (col.text.lower() == "team 1"): Team1Index = i if (col.text.lower() == "team 2"): Team2Index = i # matches=[] rows = table.find("tbody").findAll("tr") for row in rows : videos = [] cols = row.findAll("td") if (cols is not None): for yv in YouTubeColumns: if (cols[yv] is not None): if (cols[yv].a is not None): youTubeData = PluginUtils.parse_youtube_url(cols[yv].a['href']) videos.append({'text' : cols[yv].a.text, 'videoId' : youTubeData['videoId'], 'time' : youTubeData['time'] }) matches.append(LoLEventMatch(cols[0].text, cols[Team1Index].text, cols[Team2Index].text, videos)) days.append(LoLEventDay(dayId = idx, day=eventTitle, matches = matches, recommended = recommended, imageUrl = imgUrl)) return days
def find_games(self,url): page = urllib2.urlopen(url) soup = BeautifulSoup(page) games = soup.findAll('div', {'class':'sbGame'}) return games