Exemplos de BeautifulSoup.findAll em Python, exemplos de resources.lib.BeautifulSoup.BeautifulSoup.findAll em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: default.py Projeto: razvanu/krysty-xbmc

def getTVshows(url,order=None):
	progress = xbmcgui.DialogProgress()
	progress.create('Progress', 'Please wait...')
	progress.update(1, "", "Loading list - 1%", "")
	
	div = BeautifulSoup(http_req(url)).find("div", {"id": "tab1"})

	if not order:
		tvs = div.findAll("a")	
	else:
		tvs = [s.parent for s in div.findAll("a", text = re.compile(r"^" + order + ".+?$"))]
	
	current = 0
	total = len(tvs)

	while current <= total - 1:
		title = htmlFilter(tvs[current].text)
		link = urlFilter(tvs[current]['href'])

		addDir(title, link, 2)

		if progress.iscanceled(): sys.exit()

		percent = int(((current + 1) * 100) / total)
		message = "Loading list - " + str(percent) + "%"
		progress.update(percent, "", message, "")

		current += 1
	
	progress.close()
	
	xbmcplugin.endOfDirectory(int(sys.argv[1]))

Exemplo n.º 2

0

Exibir arquivo

def getTVshows(url,order=None):
	progress = xbmcgui.DialogProgress()
	progress.create('Progress', 'Please wait...')
	progress.update(1, "", "Loading list - 1%", "")
	
	div = BeautifulSoup(http_req(url)).find("div", {"id": "tab1"})

	if not order:
		tvs = div.findAll("a")	
	else:
		tvs = [s.parent for s in div.findAll("a", text = re.compile(r"^" + order + ".+?$"))]
	
	current = 0
	total = len(tvs)

	while current <= total - 1:
		title = htmlFilter(tvs[current].text)
		link = urlFilter(tvs[current]['href'])

		addDir(title, link, 2)

		if progress.iscanceled(): sys.exit()

		percent = int(((current + 1) * 100) / total)
		message = "Loading list - " + str(percent) + "%"
		progress.update(percent, "", message, "")

		current += 1
	
	progress.close()
	
	xbmcplugin.endOfDirectory(int(sys.argv[1]))

Exemplo n.º 3

0

Exibir arquivo

Arquivo: posttv.py Projeto: jmhale/xbmcposttv

def buildCategories():
	categories = ['All']
	homeurl = 'http://www.washingtonpost.com/posttv'
	homehtml = getUrl(homeurl)
	soup = BeautifulSoup(homehtml)
	results = []
	for div in soup.findAll("li", { "class" : re.compile("livenav") }):
		print div
		for links in div.findAll('a'):
			if links.has_key('href'):
				link = (links.get('href'))
		for titles in div.findAll("span"):
			title = titles.string
			title = title.replace("&nbsp;","")
		results.append((title, link))
		categories.append(title)

	for i in range(0,len(categories)):
		if categories[i] == 'All':
			catName, catSlug = 'All', 'videos'
		else:
			catName, catSlug = categories[i], categories[i].replace(' ', '-').lower()
		callback = 'plugin://plugin.video.posttv?category=' + catSlug
		li = xbmcgui.ListItem(catName)
		xbmcplugin.addDirectoryItem(handle=addon_handle, url=callback, listitem=li, isFolder=True)
	xbmcplugin.endOfDirectory(addon_handle)

Exemplo n.º 4

0

Exibir arquivo

def getMovieLens(url):

        req = urllib2.Request(url)
        req.add_header('User-Agent', "%s %s" % (sys.modules[ "__main__" ].__plugin__, sys.modules[ "__main__" ].__version__))
        response = urllib2.urlopen(req)
        soup = BeautifulSoup(response)
        mySpans= soup.findAll('span',attrs={"class" : "movieTitle"})
	##mySpans = mySpans[1], mySpans[2]
        mydirs = list()
	pDialog = xbmcgui.DialogProgress()
	ret = pDialog.create('XBMC', 'Initializing script...')
	i = 0.0
	total = len(mySpans)


        for span in mySpans:
            i += 1
            percent = int( (i * 100) / total)
            pDialog.update(percent, 'Searching ' + span.a.string,str(int(i))+'/'+str(total))


            s = span.a.string
            year = s[len(s)-7 : len(s)]
            year = year.replace('(','').replace(')','')
	    year = year.strip()
            s = s.split('(',1)[0].strip()
            s = s.replace(', The','')
				
            #print s
	    xbmc.log('s=%s' % s)
            dirs= getter.searchDirs(s + ' ' + year)
            if dirs:
                for d in dirs:
                            id = d.getElementsByTagName('id').item(0).firstChild.data
                            name = d.getElementsByTagName('name').item(0).firstChild.data
                            date = d.getElementsByTagName('date').item(0).firstChild.data
                            thumb = d.getElementsByTagName('thumb').item(0).firstChild.data
                            url = sys.argv[0] + '?action=files&did=' + id
			    clean = CleanFileName(name,False)
		            clean = clean.replace('(', '')
                            clean = clean.replace('[', '')
                            clean = clean.replace(']', '')
			    clean = clean.replace(')', '')

                            if s.lower() in clean.lower() and year in name:
			   	##xbmc.log('name=%s' % name)
				mydirs.append(d)
	    if (pDialog.iscanceled()):
		print 'Canceled search'
	        pDialog.close()
                return


        pDialog.close() 
        return mydirs

Exemplo n.º 5

0

Exibir arquivo

Arquivo: LCSStandings.py Projeto: wackeeh/leagueoflegendstv

def get_lcs_standings(teamName):
    # This method loads the latest standings from the Gamepedia server
    url = ''
    if (teamName in TEAMS_EU):
        url = PluginUtils.unescape(PluginUtils.get_string(30104))
    if (teamName in TEAMS_NA):
        url = PluginUtils.unescape(PluginUtils.get_string(30103))

    if (url != ''):
        response = PluginUtils.do_request(url)
        if (response is not None):

            # Lets process the html
            # decoded_data = json.load(response)
            soup = BeautifulSoup(response)

            tables = soup.findAll('table')

            if (tables is not None):
                for table in tables:
                    # We have the table, now lets try and get the right row
                    rows = table.find('tbody').findAll('tr')

                    if (rows is not None):
                        for idx, row in enumerate(rows):
                            columns = row.findAll('td')
                            if (columns is not None):
                                if (columns[2] is not None):
                                    if (teamName in TEAMS_EU):
                                        if (columns[2].find('a').text.lower()
                                                == TEAMS_EU[teamName].lower()):
                                            return {
                                                'standing':
                                                idx + 1,
                                                'record':
                                                columns[3].find('span').text +
                                                "W-" +
                                                columns[4].find('span').text +
                                                "L"
                                            }
                                    if (teamName in TEAMS_NA):
                                        if (columns[2].find('a').text.lower()
                                                == TEAMS_NA[teamName].lower()):
                                            return {
                                                'standing':
                                                idx + 1,
                                                'record':
                                                columns[3].find('span').text +
                                                "W-" +
                                                columns[4].find('span').text +
                                                "L"
                                            }
    return None

Exemplo n.º 6

0

Exibir arquivo

def recentlyAdded(cat):
    html = http_req(siteUrl)

    soup = BeautifulSoup(html).findAll('div', {'id': 'tab1'})

    if cat == 'tvshows':
        soup = soup[0]
        param = 'seriale'

    elif cat == 'movies':
        soup = soup[1]
        param = 'filme'

    results = soup.findAll('a', href=re.compile(param), limit=20)

    total = len(results)
    current = 0

    for a in results:
        ep_year = a.parent.parent.findAll('div')[1].text.strip()
        title = htmlFilter(a.text)
        url = urlFilter(a['href'])

        if cat == 'tvshows':
            eps = re.search(r'S(\d+)E(\d+-?\d*)', ep_year)

            season = str(eps.group(1)) if eps else ''
            episode = str(eps.group(2)) if eps else ''

            name = '%s %sx%s' % (title, season, episode)

            addDir(name,
                   url,
                   8,
                   "",
                   title,
                   season,
                   episode,
                   folder=False,
                   totalItems=total)

        elif cat == 'movies':
            year = re.search('(\d{4,4})', ep_year)
            year = str(year.group(1)) if year else 'unknown'
            name = '%s (%s)' % (title, year)

            addDir(name, url, 8, "", name, folder=False, totalItems=total)

    xbmcplugin.endOfDirectory(int(sys.argv[1]))

Exemplo n.º 7

0

Exibir arquivo

Arquivo: posttv.py Projeto: jmhale/xbmcposttv

def scrapelinks(html):
	soup = BeautifulSoup(html)
	results = []
	for div in soup.findAll("div", { "class" : "search-results-slide-item-cont" }):
		for links in div.findAll('a'):
			if links.has_key('href'):
				if '/posttv/c/video/' in links['href']:
					link = (links.get('href'))
				for images in links.findAll('div', { "class" : "search-results-slide-image image-lazy-background" }):
					image = images.get('data-image-src')
		for titles in div.findAll("div", { "class" : "search-results-slide-caption" }):
			title = titles.string
			title = title.replace("&nbsp;","")
		results.append((title, link, image))
	return results

Exemplo n.º 8

0

Exibir arquivo

Arquivo: posttv.py Projeto: jmhale/xbmcposttv

def streamGetter(url):
	## Open the PostTV video URL
	response = urllib2.urlopen(url)
	page_source = response.read()
	
	## Pull some metadata from the video page source
	soup = BeautifulSoup(page_source)
	for titles in soup.findAll("meta", attrs={"property":"og:title"}):
		title = titles.get("content")
	for descs in soup.findAll("meta", attrs={"property":"og:description"}):
		description = descs.get("content")
	for images in soup.findAll("meta", attrs={"property":"og:image"}):
		image = images.get("content")

	## Look for the Ooyala player string
	m = re.search('"((http)?://player.ooyala.com/player_v2.swf(?!.*adSetCode).*?)"', page_source)
	url = m.group()
	## strip off some extra quotes
	cleanURL = url.replace("\"","")

	## Grabs the embed code from the cleaned URL
	ec = re.search('((?<=embedCode=)(.*)(?=&autoplay))', cleanURL)
	embedCode = ec.group()

	## Decrypts the embed code and returns a stream path
	smil = CommonUtils().grabEncrypted(embedCode)
	decrypted_smil = ooyalaCrypto().ooyalaDecrypt(smil)
	videoList = MagicNaming().getVideoUrl(decrypted_smil)
	videoArray = ''.join(videoList)

	## Pulls the playpath from the stream path
	Segments = videoArray.rsplit('/',2)
	playpath = 'mp4:s/' + Segments[1]+ '/' +Segments[2]

	## Returns the title, description, thumbnail url and playpath
	return (title, description, image, playpath)

Exemplo n.º 9

0

Exibir arquivo

Arquivo: default.py Projeto: razvanu/krysty-xbmc

def recentlyAdded(cat):
    html = http_req(siteUrl)
    
    soup = BeautifulSoup(html).findAll('div', {'id': 'tab1'})

    if cat == 'tvshows':
        soup = soup[0]
        param = 'seriale'

    elif cat == 'movies':
        soup = soup[1]
        param = 'filme'

    results = soup.findAll('a', href=re.compile(param), limit=20)
        
    total = len(results)
    current = 0

    for a in results:
        ep_year = a.parent.parent.findAll('div')[1].text.strip()
        title = htmlFilter(a.text)
        url = urlFilter(a['href'])
        
        if cat == 'tvshows':
            eps = re.search(r'S(\d+)E(\d+-?\d*)', ep_year)
            
            season = eps.group(1) if eps else ''
            episode = eps.group(2) if eps else ''
        
            name = '%s %sx%s' % (title, season, episode)
        
            addDir(name,url,8,"",title,season,episode,folder=False,totalItems=total)
        
        elif cat == 'movies':
            year = re.search('(\d{4,4})', ep_year)
            year = year.group(1) if year else 'unknown'
            name = '%s (%s)' % (title, year)
            
            addDir(name,url,8,"",name,folder=False,totalItems=total)

    xbmcplugin.endOfDirectory(int(sys.argv[1]))

Exemplo n.º 10

0

Exibir arquivo

Arquivo: LCSStandings.py Projeto: habahut/leagueoflegendstv

def get_lcs_standings(teamName):
    # This method loads the latest standings from the Gamepedia server
    url=''
    if (teamName in TEAMS_EU):
        url = PluginUtils.unescape(PluginUtils.get_string(30104))
    if (teamName in TEAMS_NA):
        url = PluginUtils.unescape(PluginUtils.get_string(30103))

    if (url != ''):
        response = PluginUtils.do_request(url)
        if (response is not None):

            # Lets process the html
            # decoded_data = json.load(response)
            soup = BeautifulSoup(response)

            tables = soup.findAll('table')

            if (tables is not None):
                for table in tables:
                    # We have the table, now lets try and get the right row
                    rows = table.find('tbody').findAll('tr')

                    if (rows is not None):
                        for idx, row in enumerate(rows):
                            columns = row.findAll('td')
                            if (columns is not None):
                                if (columns[2] is not None):
                                    if (teamName in TEAMS_EU):
                                        if (columns[2].find('a').text.lower() == TEAMS_EU[teamName].lower()):
                                            return {'standing' : idx+1,
                                                    'record' : columns[3].find('span').text + "W-" + columns[4].find('span').text +"L" }
                                    if (teamName in TEAMS_NA):
                                        if (columns[2].find('a').text.lower() == TEAMS_NA[teamName].lower()):
                                            return {'standing' : idx+1,
                                                    'record' : columns[3].find('span').text + "W-" + columns[4].find('span').text +"L"}
    return None

Exemplo n.º 11

0

Exibir arquivo

Arquivo: LoLEventVODs.py Projeto: wackeeh/leagueoflegendstv

def load_event_content(eventId):

    LoLEventDay = namedtuple('LoLEventDay', 'dayId day matches recommended imageUrl')
    LoLEventMatch = namedtuple('LoLEventMatch', 'gameId team1 team2 videoLinks')

    url = LOLMATCHESURL % eventId

    response = PluginUtils.do_request(url)
    if (response is None):
        return None
    # Now lets parse results
    decoded_data = json.load(response)

    selfText = decoded_data[0]['data']['children'][0]['data']['selftext_html']

    eventTitle = ''
    days = []

    soup = BeautifulSoup(PluginUtils.unescape(selfText))

    # Get all the recommended matches, we add those to the events
    # We do it like this Game H1_C1_C4
    recommended = ''
    #a href="/spoiler"
    spoilers = soup.findAll("a", href="/spoiler")
    if (spoilers is not None):
        for spoiler in spoilers:
            # add them to the list
            games = spoiler.text.replace(',', '_')
            recommended += games + "_"

    imgUrl = ''
    link = soup.find('a', href='#EVENT_PICTURE')
    if (link is not None):
        imgUrl = link.title

    # find all tables
    tables = soup.findAll("table")
    for idx, table in enumerate(tables):
        if (table is not None):

            titleLink = table.find("a", href="http://www.table_title.com")
            if (titleLink is not None):
                eventTitle = titleLink['title']

            YouTubeColumns = []
            Team1Index = -1
            Team2Index = -1

            # Investigate the right columns for youtube links
            rows = table.find("thead").findAll("tr")
            for row in rows :
                cols = row.findAll("th")
                for i, col in enumerate(cols):
                 if (col.text.lower() == "youtube"):
                     YouTubeColumns.append(i)
                 if (col.text.lower() == "team 1"):
                     Team1Index = i
                 if (col.text.lower() == "team 2"):
                     Team2Index = i

            #
            matches=[]

            rows = table.find("tbody").findAll("tr")
            for row in rows :
                videos = []
                cols = row.findAll("td")
                if (cols is not None):
                    for yv in YouTubeColumns:
                        if (cols[yv] is not None):
                            if (cols[yv].a is not None):

                                youTubeData = PluginUtils.parse_youtube_url(cols[yv].a['href'])
                                videos.append({'text' : cols[yv].a.text,
                                               'videoId' : youTubeData['videoId'],
                                               'time' : youTubeData['time'] })

                matches.append(LoLEventMatch(cols[0].text, cols[Team1Index].text, cols[Team2Index].text, videos))

            days.append(LoLEventDay(dayId = idx,
                                day=eventTitle,
                                matches = matches,
                                recommended = recommended,
                                imageUrl = imgUrl))
    return days

Exemplo n.º 12

0

Exibir arquivo

Arquivo: GCparser.py Projeto: nayab9/xbmc-addons

 def find_games(self,url):
     page = urllib2.urlopen(url)
     soup = BeautifulSoup(page)
     games = soup.findAll('div', {'class':'sbGame'})
     return games