Python Entity.mpaa_rating примеры использования

Язык программирования: Python

Пространство имен/Пакет: Schemas

Класс/Тип: Entity

Метод/Функция: mpaa_rating

Примеров на hotexamples.com: 2

Python Entity.mpaa_rating - 2 примера найдено. Это лучшие примеры Python кода для Schemas.Entity.mpaa_rating, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

subcategory(24)

title(21)

address(16)

desc(4)

author(3)

image(3)

site(2)

phone(2)

factual(2)

mpaa_rating(2)

publisher(2)

yrating(1)

original_release_date(1)

publish_date(1)

seattletimes(1)

small(1)

sku_number(1)

vicinity(1)

nytimes(1)

subtitle(1)

tiny(1)

yreviews(1)

titlel(1)

track_length(1)

openTable(1)

nrating(1)

nymag(1)

hoursOfOperation(1)

awardAnnals(1)

bid(1)

cast(1)

cuisine(1)

earliest_air_date(1)

entity_id(1)

googleLocal(1)

hd(1)

imdb_id(1)

nurl(1)

isbn(1)

large(1)

lat(1)

latest_air_date(1)

lng(1)

ngenres(1)

nid(1)

num_seasons(1)

yurl(1)

Пример #1

Показать файл

Файл: TheTVDBCrawler.py Проект: Stamped/Stamped

 def _parse_series_page(self, name, url):
     if '**' in name or 'DUPLICATE' in name or name.startswith('.hack'):
         return
     
     utils.log('[%s] parsing page %s (%s)' % (self, name, url))
     
     try:
         soup = utils.getSoup(url)
     except:
         utils.printException()
         utils.log("[%s] error downloading page %s (%s)" % (self, name, url))
         return
     
     contents = soup.findAll('div', {'id' : 'content'})
     header = contents[0]
     
     h1 = header.find('h1') 
     title = h1.getText()
     h1.extract()
     
     entity = Entity()
     
     # parse basic show info
     entity.title = title
     entity.subcategory = 'tv'
     
     desc = header.getText().replace('\r\n', '\n')
     if len(desc) > 5:
         entity.desc = desc
     
     entity.sources.thetvdb_id = self._id_re.match(url).groups()[0]
     
     # parse images
     images = map(lambda img: img.get('src'), soup.findAll('img', {'class' : 'banner'}))
     types  = [ 'posters', 'fanart', 'graphical', ]
     
     for image_type in types:
         filtered_images = filter(lambda img: image_type in img, images)
         if len(filtered_images) > 0:
             entity.image = "%s%s" % (self.base, filtered_images[0])
             break
     
     info = contents[1].find('table').find('table')
     rows = info.findAll('tr')
     
     # parse detailed show info
     info_map = {
         0 : 'original_release_date', 
         3 : 'air_time', 
         4 : 'network_name', 
         5 : 'genre', 
     }
     
     for k, k2 in info_map.iteritems():
         try:
             value = rows[k].findAll('td')[1].getText()
             if len(value) > 0:
                 entity[k2] = value
         except:
             utils.printException()
             pass
     
     # parse cast
     try:
         actors = "%s%s" % (self.base, contents[-1].findAll('a')[-1].get('href'))
         actors_soup = utils.getSoup(actors)
         
         infotables = actors_soup.findAll('table', {'class' : 'infotable'})
         cast = []
         
         for infotable in infotables:
             text = infotable.find('td').getText(separator='___')
             match = self._actor_re.match(text)
             if match is not None:
                 groups = match.groups()
                 cast.append('%s as %s' % (groups[0].strip(), groups[1].strip()))
                 # TODO: record actor images
         
         if len(cast) > 0:
             entity.cast = ', '.join(cast)
     except:
         pass
     
     # parse seasons
     try:
         seasons = "%s%s" % (self.base, contents[2].findAll('a')[-1].get('href'))
         seasons_soup = utils.getSoup(seasons)
         
         rows = seasons_soup.find('table', {'id' : 'listtable'}).findAll('tr')[1:]
         
         highest_season = -1
         earliest = None
         latest   = None
         
         # each row is an episode; loop through each episode, recording the 
         # earliest and latest air date for the show overall and the number 
         # of seasons the show ran for.
         for row in rows:
             tds = row.findAll('td')
             episode = tds[0].getText()
             match = self._season_re.match(episode)
             
             if match is not None:
                 groups  = match.groups()
                 season  = int(groups[0])
                 episode = int(groups[1])
                 
                 if season > highest_season:
                     highest_season = season
                 
                 date  = tds[2].getText()
                 match = self._date_re.match(date)
                 
                 if match is not None:
                     year, month, day = match.groups()
                     date = datetime(year=int(year), month=int(month), day=int(day))
                     
                     if earliest is None or date < earliest:
                         earliest = date
                     
                     if latest is None or date > latest:
                         latest = date
         
         if highest_season > 0:
             entity.num_seasons = highest_season
         
         if earliest is not None:
             entity.earliest_air_date = earliest
         
         if latest is not None:
             entity.latest_air_date = latest
     except:
         utils.printException()
     
     entity2 = self._thetvdb.lookup(entity.sources.thetvdb_id)
     
     if entity2 is not None:
         if entity2.mpaa_rating is not None:
             entity.mpaa_rating = entity2.mpaa_rating
         if entity2.imdb_id is not None:
             entity.imdb_id     = entity2.imdb_id
     
     self._output.put(entity)

Пример #2

Показать файл

Файл: NetflixDump.py Проект: Stamped/Stamped

 def _parse_dump(self, filename):
     f = gzip.open(filename, 'rb')
     context = iter(etree.iterparse(f, events=("start", "end")))
     
     event, root = context.next()
     
     nid_re              = re.compile('.*\/([0-9]*)$')
     language_re         = re.compile('.*\/languages$')
     match_genre_re      = re.compile('.*\/genres$')
     match_ratings_re    = re.compile('.*\/mpaa_ratings$')
     
     match_genre_func    = lambda c: re.match(match_genre_re, c.get('scheme')) is not None
     match_ratings_func  = lambda c: re.match(match_ratings_re, c.get('scheme')) is not None
     match_language_func = lambda c: re.match(language_re, c.get('scheme')) is not None
     
     count = 0
     bonus_materials = set()
     
     # loop through each XML catalog_title element and parse it as a movie Entity
     for event, elem in context:
         if event == "end" and elem.tag == "catalog_title":
             root.clear()
             
             try:
                 rating_elem = elem.find('average_rating')
                 if rating_elem is None:
                     continue
                 
                 entity = Entity()
                 nid = elem.find('id').text
                 nid = int(re.match(nid_re, nid).groups()[0])
                 
                 bonus_materials_elem = elem.find('.//bonus_materials')
                 if bonus_materials_elem is not None:
                     links = map(lambda l: l.get('href'), bonus_materials_elem.findall('link'))
                     
                     for link in links:
                         bonus_material_id = int(re.match(nid_re, link).groups()[0])
                         #bonus_material_id = re.match(bonus_materials_id_re, link).groups()[0]
                         bonus_materials.add(bonus_material_id)
                 
                 if nid in bonus_materials:
                     continue
                 
                 title = elem.find('title').get('regular')
                 titlel = title.lower()
                 
                 if 'bonus material' in titlel:
                     continue
                 
                 entity.title = title
                 entity.nid = nid
                 entity.desc = elem.find('.//synopsis').text
                 entity.nrating = float(rating_elem.text)
                 
                 categories = elem.findall('category')
                 
                 genres = map(lambda c: c.get('label'), filter(match_genre_func, categories))
                 entity.ngenres = genres
                 
                 tv = False
                 for genre in genres:
                     if 'tv' in genre.lower():
                         tv = True
                         break
                 
                 if tv:
                     entity.subcategory = 'tv'
                 else:
                     entity.subcategory = 'movie'
                 
                 ratings = map(lambda c: c.get('label'), filter(match_ratings_func, categories))
                 if 1 == len(ratings):
                     entity.mpaa_rating = ratings[0]
                 
                 images = elem.find('.//box_art').findall('link')
                 if 3 == len(images) or 4 == len(images):
                     entity.tiny  = images[0].get('href')
                     entity.small = images[1].get('href')
                     entity.large = images[2].get('href')
                     
                     if 4 == len(images):
                         entity.hd = images[3].get('href')
                 
                 links = filter(lambda l: 'web page' == l.get('title'), elem.findall('link'))
                 if 1 == len(links):
                     entity.nurl = links[0].get('href')
                 
                 language_elem  = elem.find('.//languages_and_audio')
                 language_elems = filter(match_language_func, language_elem.findall('.//category'))
                 
                 release_year_elem = elem.find('release_year')
                 if release_year_elem is not None:
                     entity.original_release_date = release_year_elem.text
                 
                 duration = elem.find('runtime')
                 if duration is not None:
                     entity.track_length = duration.text
                 
                 languages = set()
                 for elem2 in language_elems:
                     languages.add(elem2.get('label').lower())
                 
                 if 'english' not in languages:
                     continue
                 
                 #utils.log(entity.title)
                 #pprint(entity.getDataAsDict())
                 
                 """
                 self._globals['n'] = elem
                 self._globals['s'] = etree.tostring(elem, pretty_print=True)
                 self._globals['e'] = entity
                 break
                 """
                 
                 self._output.put(entity)
                 count += 1
                 
                 # give the downstream consumer threads an occasional chance to work
                 if 0 == (count % 512):
                     time.sleep(0.1)
                 
                 elem.clear()
             except Exception, e:
                 utils.printException()
                 utils.log(elem.find('title').get('regular'))