Python Entity.nurl примеры использования

Язык программирования: Python

Пространство имен/Пакет: Schemas

Класс/Тип: Entity

Метод/Функция: nurl

Примеров на hotexamples.com: 1

Python Entity.nurl - 1 пример найден. Это лучшие примеры Python кода для Schemas.Entity.nurl, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

subcategory(24)

title(21)

address(16)

desc(4)

author(3)

image(3)

site(2)

phone(2)

factual(2)

mpaa_rating(2)

publisher(2)

yrating(1)

original_release_date(1)

publish_date(1)

seattletimes(1)

small(1)

sku_number(1)

vicinity(1)

nytimes(1)

subtitle(1)

tiny(1)

yreviews(1)

titlel(1)

track_length(1)

openTable(1)

nrating(1)

nymag(1)

hoursOfOperation(1)

awardAnnals(1)

bid(1)

cast(1)

cuisine(1)

earliest_air_date(1)

entity_id(1)

googleLocal(1)

hd(1)

imdb_id(1)

nurl(1)

isbn(1)

large(1)

lat(1)

latest_air_date(1)

lng(1)

ngenres(1)

nid(1)

num_seasons(1)

yurl(1)

Пример #1

Показать файл

Файл: NetflixDump.py Проект: Stamped/Stamped

 def _parse_dump(self, filename):
     f = gzip.open(filename, 'rb')
     context = iter(etree.iterparse(f, events=("start", "end")))
     
     event, root = context.next()
     
     nid_re              = re.compile('.*\/([0-9]*)$')
     language_re         = re.compile('.*\/languages$')
     match_genre_re      = re.compile('.*\/genres$')
     match_ratings_re    = re.compile('.*\/mpaa_ratings$')
     
     match_genre_func    = lambda c: re.match(match_genre_re, c.get('scheme')) is not None
     match_ratings_func  = lambda c: re.match(match_ratings_re, c.get('scheme')) is not None
     match_language_func = lambda c: re.match(language_re, c.get('scheme')) is not None
     
     count = 0
     bonus_materials = set()
     
     # loop through each XML catalog_title element and parse it as a movie Entity
     for event, elem in context:
         if event == "end" and elem.tag == "catalog_title":
             root.clear()
             
             try:
                 rating_elem = elem.find('average_rating')
                 if rating_elem is None:
                     continue
                 
                 entity = Entity()
                 nid = elem.find('id').text
                 nid = int(re.match(nid_re, nid).groups()[0])
                 
                 bonus_materials_elem = elem.find('.//bonus_materials')
                 if bonus_materials_elem is not None:
                     links = map(lambda l: l.get('href'), bonus_materials_elem.findall('link'))
                     
                     for link in links:
                         bonus_material_id = int(re.match(nid_re, link).groups()[0])
                         #bonus_material_id = re.match(bonus_materials_id_re, link).groups()[0]
                         bonus_materials.add(bonus_material_id)
                 
                 if nid in bonus_materials:
                     continue
                 
                 title = elem.find('title').get('regular')
                 titlel = title.lower()
                 
                 if 'bonus material' in titlel:
                     continue
                 
                 entity.title = title
                 entity.nid = nid
                 entity.desc = elem.find('.//synopsis').text
                 entity.nrating = float(rating_elem.text)
                 
                 categories = elem.findall('category')
                 
                 genres = map(lambda c: c.get('label'), filter(match_genre_func, categories))
                 entity.ngenres = genres
                 
                 tv = False
                 for genre in genres:
                     if 'tv' in genre.lower():
                         tv = True
                         break
                 
                 if tv:
                     entity.subcategory = 'tv'
                 else:
                     entity.subcategory = 'movie'
                 
                 ratings = map(lambda c: c.get('label'), filter(match_ratings_func, categories))
                 if 1 == len(ratings):
                     entity.mpaa_rating = ratings[0]
                 
                 images = elem.find('.//box_art').findall('link')
                 if 3 == len(images) or 4 == len(images):
                     entity.tiny  = images[0].get('href')
                     entity.small = images[1].get('href')
                     entity.large = images[2].get('href')
                     
                     if 4 == len(images):
                         entity.hd = images[3].get('href')
                 
                 links = filter(lambda l: 'web page' == l.get('title'), elem.findall('link'))
                 if 1 == len(links):
                     entity.nurl = links[0].get('href')
                 
                 language_elem  = elem.find('.//languages_and_audio')
                 language_elems = filter(match_language_func, language_elem.findall('.//category'))
                 
                 release_year_elem = elem.find('release_year')
                 if release_year_elem is not None:
                     entity.original_release_date = release_year_elem.text
                 
                 duration = elem.find('runtime')
                 if duration is not None:
                     entity.track_length = duration.text
                 
                 languages = set()
                 for elem2 in language_elems:
                     languages.add(elem2.get('label').lower())
                 
                 if 'english' not in languages:
                     continue
                 
                 #utils.log(entity.title)
                 #pprint(entity.getDataAsDict())
                 
                 """
                 self._globals['n'] = elem
                 self._globals['s'] = etree.tostring(elem, pretty_print=True)
                 self._globals['e'] = entity
                 break
                 """
                 
                 self._output.put(entity)
                 count += 1
                 
                 # give the downstream consumer threads an occasional chance to work
                 if 0 == (count % 512):
                     time.sleep(0.1)
                 
                 elem.clear()
             except Exception, e:
                 utils.printException()
                 utils.log(elem.find('title').get('regular'))