Python MediaInfo示例，MetaProviders.MediaInfo Python示例

示例#1

0

显示文件

文件： Parsing.py 项目： ianewan/LetMeWatchThis.bundle

def GetItems(type, genre=None, sort=None, alpha=None, pages=5, start_page=0):

    page_num = 0
    items = []

    while (page_num < pages):

        page_num = page_num + 1
        url = GetURL(type=type,
                     genre=genre,
                     sort=sort,
                     alpha=alpha,
                     page_num=page_num + start_page)
        url_parts = urlparse.urlparse(url)
        soup = BeautifulSoup(HTTP.Request(url).content)

        for item in soup.findAll("div", {'class': 'index_item index_item_ie'}):

            #Log('Found item: ' + str(item))
            res = MediaInfo()

            res.type = type

            # Extract out title
            res.title = re.search("Watch (.*)",
                                  item.find('a')['title']).group(1).strip()
            match = re.search("(.*)\((\d*)\)", res.title)

            if (match):
                res.title = match.group(1).strip()
                res.year = int(match.group(2).strip())

            # Extract out URL
            res.id = item.a['href'][1:]

            # Extract out thumb. Note that extracted path may be relative.
            poster_url = item.find('img')['src']
            if poster_url.startswith("//"):
                poster_url = url_parts.scheme + ":" + poster_url
            elif poster_url.startswith("/"):
                # Deal with fully-relative paths. Doesn't deal with partial relative paths.
                poster_url = url_parts.scheme + "://" + url_parts.netloc + poster_url

            res.poster = poster_url

            # Extract out rating
            rating_style = item.find('li')['style']
            rating = re.search("width:\s([\d\.]*)px;", rating_style).group(1)

            if (rating is not None and rating <> ""):
                res.rating = int(int(rating) / 10)

            # Add to item list.
            #Log("Adding item: " + str(res))
            items.append(res)

    return items

示例#2

0

显示文件

def GetItems(type, genre=None, sort=None, alpha=None, pages=5, start_page=0):

    items = []
    url = ICEFILMS_URL + "/" + type

    if (genre):
        genre = genre.lower()

    if (alpha):
        sort = "a-z"
        genre = "1" if alpha == "123" else alpha.upper()

    if (sort is not None):
        url = url + "/" + sort

    if (genre is not None):
        url = url + "/" + genre
    else:
        url = url + "/1"

    soup = BeautifulSoup(HTTP.Request(url).content)

    # RegEx to extract out item id.
    id_reg_ex = re.compile("/ip.php\?v=(\d+)")

    for item in soup.findAll("a", {'name': 'i'}):

        res = MediaInfo()

        res.type = type

        title_elem = item.nextSibling.nextSibling

        # Pick out next element
        # Extract out title
        res.title = Utils.decode_htmlentities(str(title_elem.string))

        match = re.search("(.*)\((\d*)\)", res.title)

        if (match):
            res.title = match.group(1).strip()
            res.year = int(match.group(2).strip())

        # Extract out id if available, otherwise, just store the item's URL.
        match = id_reg_ex.search(title_elem['href'])
        #Log(match)
        if (match):
            res.id = match.group(0)
        else:
            res.id = title_elem['href']
        #Log(res.id)

        # Add to item list.
        #Log("Adding item: " + str(res))
        items.append(res)

    return items

示例#3

0

显示文件

def GetItems(type, genre=None, sort=None, alpha=None, pages=5, start_page=0):

    page_num = 0
    items = []

    while (page_num < pages):

        page_num = page_num + 1
        url = GetURL(type=type,
                     genre=genre,
                     sort=sort,
                     alpha=alpha,
                     page_num=page_num + start_page)
        soup = BeautifulSoup(HTTP.Request(url).content)

        for item in soup.findAll("div", {'class': 'index_item index_item_ie'}):

            #Log('Found item: ' + str(item))
            res = MediaInfo()

            res.type = type

            # Extract out title
            res.title = re.search("Watch (.*)",
                                  item.find('a')['title']).group(1).strip()
            match = re.search("(.*)\((\d*)\)", res.title)

            if (match):
                res.title = match.group(1).strip()
                res.year = int(match.group(2).strip())

            # Extract out URL
            res.id = item.a['href'][1:]

            # Extract out thumb
            res.poster = item.find('img')['src']

            # Extract out rating
            rating_style = item.find('li')['style']
            rating = re.search("width:\s([\d\.]*)px;", rating_style).group(1)

            if (rating is not None and rating <> ""):
                res.rating = int(int(rating) / 10)

            # Add to item list.
            #Log("Adding item: " + str(res))
            items.append(res)

    return items

示例#4

0

显示文件

文件： Parsing.py 项目： eliekaram/IceFilms.Bundle

def GetItems(type, genre = None, sort = None, alpha = None, pages = 5, start_page = 0):

	items = []
	url = ICEFILMS_URL + "/" + type
	
	if (genre):
		genre = genre.lower()
		
	if (alpha):
		sort = "a-z"
		genre = "1" if alpha == "123" else alpha.upper()
		
	if (sort is not None):
		url = url + "/" + sort
		
	if (genre is not None):
		url = url + "/" + genre
	else:
		url = url + "/1"
	
	soup = BeautifulSoup(HTTP.Request(url).content)
	
	# RegEx to extract out item id.
	id_reg_ex = re.compile("/ip.php\?v=(\d+)")
	
	for item in soup.findAll("a", { 'name': 'i' }):
	
		res = MediaInfo()
			
		res.type = type

		title_elem = item.nextSibling.nextSibling
		
		# Pick out next element 
		# Extract out title
		res.title = Utils.decode_htmlentities(str(title_elem.string))
		
		match = re.search("(.*)\((\d*)\)", res.title)
		
		if (match):
			res.title = match.group(1).strip()
			res.year = int(match.group(2).strip())
		
		# Extract out id if available, otherwise, just store the item's URL.
		match = id_reg_ex.search(title_elem['href'])
		#Log(match)
		if (match):
			res.id = match.group(0)
		else:
			res.id = title_elem['href']
		#Log(res.id)
		
		# Add to item list.
		#Log("Adding item: " + str(res))
		items.append(res)
	
	return items

示例#5

0

显示文件

文件： Parsing.py 项目： ReallyFuzzy/LetMeWatchThis.bundle

def GetItems(type, genre = None, sort = None, alpha = None, pages = 5, start_page = 0):

	page_num = 0
	items = []
	
	while (page_num < pages):
	
		page_num = page_num + 1
		url = GetURL(type = type, genre = genre, sort = sort, alpha = alpha, page_num = page_num + start_page)
		url_parts = urlparse.urlparse(url)
		soup = BeautifulSoup(HTTP.Request(url).content)
		
		for item in soup.findAll("div", { 'class': 'index_item index_item_ie' }):
		
			#Log('Found item: ' + str(item))
			res = MediaInfo()
			
			res.type = type

			# Extract out title
			res.title = re.search("Watch (.*)", item.find('a')['title']).group(1).strip()
			match = re.search("(.*)\((\d*)\)", res.title)
			
			if (match):
				res.title = match.group(1).strip()
				res.year = int(match.group(2).strip())
			
			# Extract out URL
			res.id = item.a['href'][1:]
			
			# Extract out thumb. Note that extracted path may be relative.
			poster_url = item.find('img')['src']
			if poster_url.startswith("//"):
				poster_url = url_parts.scheme + ":" + poster_url
			elif poster_url.startswith("/"):
				# Deal with fully-relative paths. Doesn't deal with partial relative paths.
				poster_url = url_parts.scheme + "://" + url_parts.netloc + poster_url
				
			res.poster = poster_url
			
			# Extract out rating
			rating_style = item.find('li')['style']
			rating = re.search("width:\s([\d\.]*)px;", rating_style).group(1);
			
			if (rating is not None and rating <> ""):
				res.rating = int(int(rating) / 10)
			
			# Add to item list.
			#Log("Adding item: " + str(res))
			items.append(res)
		
	return items

示例#6

0

显示文件

文件： Parsing.py 项目： 2076Hammer/LetMeWatchThis.bundle

def GetItems(type, genre = None, sort = None, alpha = None, pages = 5, start_page = 0):

	page_num = 0
	items = []
	
	while (page_num < pages):
	
		page_num = page_num + 1
		url = GetURL(type = type, genre = genre, sort = sort, alpha = alpha, page_num = page_num + start_page)
		soup = BeautifulSoup(HTTP.Request(url).content)
		
		for item in soup.findAll("div", { 'class': 'index_item index_item_ie' }):
		
			#Log('Found item: ' + str(item))
			res = MediaInfo()
			
			res.type = type

			# Extract out title
			res.title = re.search("Watch (.*)", item.find('a')['title']).group(1).strip()
			match = re.search("(.*)\((\d*)\)", res.title)
			
			if (match):
				res.title = match.group(1).strip()
				res.year = int(match.group(2).strip())
			
			# Extract out URL
			res.id = item.a['href'][1:]
			
			# Extract out thumb
			res.poster = item.find('img')['src']
			
			# Extract out rating
			rating_style = item.find('li')['style']
			rating = re.search("width:\s([\d\.]*)px;", rating_style).group(1);
			
			if (rating is not None and rating <> ""):
				res.rating = int(int(rating) / 10)
			
			# Add to item list.
			#Log("Adding item: " + str(res))
			items.append(res)
		
	return items

示例#7

0

显示文件

文件： Parsing.py 项目： plugy/IceFilms.Bundle

def GetSearchResults(query=None,type=None,imdb_id=None, exact=False):
	
	if (type=="movies"):
		# This a google search. The -tv will ommit all TV shows.
		search = 'intitle:%s -"Episode List" -"Series Rating" site:%s' % (query,ICEFILMS_URL)
	else:
		search = 'allintitle:%s "Episode List" site:%s' % (query, ICEFILMS_URL)
	
	gs = GoogleSearch(search)
	gs.results_per_page = 25
	gs.page = 0
	results = gs.get_results()
	items = []
	
	for res in results:
	
		name = re.sub(
			'(<em>|</em>|<a>|</a>|DivX|-|icefilms(\.info)?|<b>\.\.\.</b>|Episode List|links)',
			'',
			res.title.encode('utf8')
		).strip()

		url=res.url
		video_url = re.search("icefilms\.info(/.*)", url).group(1)
		
		res = MediaInfo()
		
		res.type = type
		res.title = name

		match = re.search("(.*)\((\d*)\)", res.title)
		
		if (match):
			res.title = match.group(1).strip()
			res.year = int(match.group(2).strip())
			
		res.id = video_url
		
		items.append(res)
	
	return items

示例#8

0

显示文件

文件： Parsing.py 项目： eliekaram/IceFilms.Bundle

def GetSearchResults(query=None,type=None,imdb_id=None):
	
	if (type=="movies"):
		# This a google search. The -tv will ommit all TV shows.
		search = 'intitle:%s -"Episode List" -"Series Rating" site:%s' % (query,ICEFILMS_URL)
	else:
		search = 'allintitle:%s "Episode List" site:%s' % (query, ICEFILMS_URL)
	
	gs = GoogleSearch(search)
	gs.results_per_page = 25
	gs.page = 0
	results = gs.get_results()
	items = []
	
	for res in results:
	
		name = re.sub(
			'(<em>|</em>|<a>|</a>|DivX|-|icefilms(\.info)?|<b>\.\.\.</b>|Episode List|links)',
			'',
			res.title.encode('utf8')
		).strip()

		url=res.url
		video_url = re.search("icefilms\.info(/.*)", url).group(1)
		
		res = MediaInfo()
		
		res.type = type
		res.title = name

		match = re.search("(.*)\((\d*)\)", res.title)
		
		if (match):
			res.title = match.group(1).strip()
			res.year = int(match.group(2).strip())
			
		res.id = video_url
		
		items.append(res)
	
	return items

示例#9

0

显示文件

def GetMediaInfo(url, mediainfo, query_external=False):
    """
	Retrieve meta data about the passed in LMWT item from a meta provider.
	Additionally, for any info not returned by the meta provider, try to
	collect the info directly from the LMWT item page.
	"""

    # The description meta header for some shows inserts random double quotes in the
    # content which breaks the parsing of the page. Work around that by simply
    # removing the head section in which the meta elements are contained.
    headMassage = [(re.compile('<head>(.*)</head>', re.S), lambda match: '')]
    soupMassage = copy.copy(BeautifulSoup.MARKUP_MASSAGE)
    soupMassage.extend(headMassage)

    soup = BeautifulSoup(HTTP.Request(Dict['LMWT_URL'] + url).content,
                         markupMassage=soupMassage)

    try:

        imdb_id = None
        try:
            imdb_link = soup.find('div', 'mlink_imdb').a['href']
            imdb_id = re.search("(tt\d+)", str(imdb_link)).group()
        except:
            pass

        # Construct kwargs.
        kwargs = {}

        kwargs['imdb_id'] = imdb_id
        kwargs['show_name'] = mediainfo.show_name
        kwargs['season'] = mediainfo.season

        if hasattr(mediainfo, 'ep_num'):
            kwargs['ep_num'] = mediainfo.ep_num

        if (query_external):
            #Log("Query-ing External Provider")
            mediainfo_ret = DBProvider().GetProvider(
                mediainfo.type).RetrieveItemFromProvider(**kwargs)
            #Log(str(mediainfo))
        else:
            mediainfo_ret = MediaInfo()
            mediainfo_ret.id = imdb_id

        # Also parse the LMWT page and extract out any info not set by the meta provider.
        info_div = soup.find('div', 'movie_info')

        # First, extract out description...
        info = {}
        info['Description:'] = info_div.find('td', {'colspan': '2'}).text

        # Then, ratings....
        info['Rating:'] = info_div.find('li', 'current-rating').text

        # Extract out any other info.
        for row in info_div.findAll('tr'):
            row_items = row.findAll('td')
            if len(row_items) <> 2 or "colspan" in str(row_items[0]):
                continue
            info[row_items[0].text] = row_items[1].text

        # Map available extracted info back to the media info object.
        # First, define the mapping between LMWT items and media info and an additional function
        # to extract out sane info out of the LMWT data.
        item_map = {
            'Description:':
            ['summary', lambda x: Utils.decode_htmlentities(x)],
            'Air Date:':
            ['releasedate', lambda x: datetime.strptime(x, '%B %d, %Y')],
            'Runtime:': [
                'duration',
                lambda x: int(re.search("(\d*)", x).group(0)) * 60 * 1000
                if int(re.search("(\d*)", x).group(0)
                       ) * 60 * 1000 < sys.maxint else 0
            ],
            'Rating:': [
                'rating',
                lambda x: float(re.search("([\d\.]+)", x).group(0)) * 2
            ],
            'Title:': ['title', lambda x: decode_htmlentities(x)],
        }

        # For each extracted item from LMWT...
        for lmwt_item in info.keys():

            #Log("Processing: " + lmwt_item)

            # Look for matching entry in map...
            if lmwt_item not in item_map.keys():
                #Log("Not processing - no mapping")
                continue

            mi_item = item_map[lmwt_item]

            if (mi_item is None):
                #Log("Couldn't find a mi attr!")
                continue

            try:
                # And see if it's already set in the mediaInfo object.
                mi_val = getattr(mediainfo_ret, mi_item[0], None)

                #Log("Current mi value: " + str(mi_val))

                # And set it if it's not already.
                if (not mi_val):
                    #Log("Setting mi attr " + mi_item[0] + " to: " + str(mi_item[1](info[lmwt_item])))
                    setattr(mediainfo_ret, mi_item[0],
                            mi_item[1](info[lmwt_item]))

            except Exception, ex:
                #Log.Exception("Error whilst reading in info from LMWT Page. Field " + lmwt_item)
                pass

        return mediainfo_ret

示例#10

0

显示文件

def GetSearchResults(query=None, type=None, imdb_id=None, exact=False):

    items = []

    if (imdb_id):

        res = MediaInfo()
        res.type = type
        res.id = "/item.php?imdb=" + imdb_id
        res.title = query

        items.append(res)

    else:

        soup = BeautifulSoup(
            HTTP.Request(Dict['LMWT_SEARCH_URL'] + "?search",
                         cacheTime=0).content)
        key = soup.find('input', {'type': 'hidden', 'name': 'key'})['value']

        section = "1"
        if (type == "tv"):
            section = "2"

        url = Dict[
            'LMWT_SEARCH_URL'] + "?search_section=" + section + "&search_keywords=" + urllib.quote_plus(
                query) + "&key=" + key + "&sort=views"
        soup = BeautifulSoup(HTTP.Request(url, cacheTime=0).content)
        #Log(soup)

        for item in soup.findAll("div", {'class': 'index_item index_item_ie'}):

            #Log('Found item: ' + str(item))
            res = MediaInfo()

            res.type = type

            # Extract out title
            res.title = re.search("Watch (.*)",
                                  item.find('a')['title']).group(1).strip()
            match = re.search("(.*)\((\d*)\)", res.title)

            if (match):
                res.title = match.group(1).strip()
                res.year = int(match.group(2).strip())

            # Extract out URL
            res.id = item.a['href'][1:]

            # Extract out thumb
            res.poster = item.find('img')['src']

            # Extract out rating
            rating_style = item.find('li')['style']
            res.rating = re.search("width:\s(\d)*px;", rating_style).group(1)

            # Add to item list.
            #Log("Adding item: " + str(res))
            if not exact or res.title.lower() == query.lower():
                items.append(res)

    #Log(items)
    return items

示例#11

0

显示文件

文件： Parsing.py 项目： 2076Hammer/LetMeWatchThis.bundle

def GetSearchResults(query=None,type=None,imdb_id=None, exact=False):
	
	items = []
	
	
	if (imdb_id):
	
		res = MediaInfo()
		res.type = type
		res.id = "/item.php?imdb=" + imdb_id
		res.title = query
		
		items.append(res)
		
	else:
	
		soup = BeautifulSoup(HTTP.Request(Dict['LMWT_SEARCH_URL'] + "?search",cacheTime=0).content)
		key = soup.find('input', { 'type': 'hidden', 'name': 'key' })['value']
		
		section = "1"
		if (type == "tv"):
			section = "2"
		
		url = Dict['LMWT_SEARCH_URL'] + "?search_section=" + section + "&search_keywords=" + urllib.quote_plus(query) + "&key=" + key + "&sort=views"
		soup = BeautifulSoup(HTTP.Request(url,cacheTime=0).content)
		#Log(soup)
		
		for item in soup.findAll("div", { 'class': 'index_item index_item_ie' }):
		
			#Log('Found item: ' + str(item))
			res = MediaInfo()
			
			res.type = type
			
			# Extract out title
			res.title = re.search("Watch (.*)", item.find('a')['title']).group(1).strip()
			match = re.search("(.*)\((\d*)\)", res.title)
			
			if (match):
				res.title = match.group(1).strip()
				res.year = int(match.group(2).strip())
			
			# Extract out URL
			res.id = item.a['href'][1:]
			
			# Extract out thumb
			res.poster = item.find('img')['src']
			
			# Extract out rating
			rating_style = item.find('li')['style']
			res.rating = re.search("width:\s(\d)*px;", rating_style).group(1);
			
			# Add to item list.
			#Log("Adding item: " + str(res))
			if not exact or res.title.lower() == query.lower():
				items.append(res)
	
	#Log(items)
	return items