Python Entity.image примеры использования

Язык программирования: Python

Пространство имен/Пакет: Schemas

Класс/Тип: Entity

Метод/Функция: image

Примеров на hotexamples.com: 3

Python Entity.image - 3 примера найдено. Это лучшие примеры Python кода для Schemas.Entity.image, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

subcategory(24)

title(21)

address(16)

desc(4)

author(3)

image(3)

site(2)

phone(2)

factual(2)

mpaa_rating(2)

publisher(2)

yrating(1)

original_release_date(1)

publish_date(1)

seattletimes(1)

small(1)

sku_number(1)

vicinity(1)

nytimes(1)

subtitle(1)

tiny(1)

yreviews(1)

titlel(1)

track_length(1)

openTable(1)

nrating(1)

nymag(1)

hoursOfOperation(1)

awardAnnals(1)

bid(1)

cast(1)

cuisine(1)

earliest_air_date(1)

entity_id(1)

googleLocal(1)

hd(1)

imdb_id(1)

nurl(1)

isbn(1)

large(1)

lat(1)

latest_air_date(1)

lng(1)

ngenres(1)

nid(1)

num_seasons(1)

yurl(1)

Пример #1

Показать файл

Файл: ZagatCrawler.py Проект: Stamped/Stamped

 def _parseRestaurantPage(self, pool, region_name, city_name, restaurant_name, href):
     utils.log("[%s] parsing restaurant '%s.%s.%s' (%s)" % (self, region_name, city_name, restaurant_name, href))
     
     try:
         soup = utils.getSoup(href)
     except:
         utils.printException()
         utils.log("[%s] error downloading page %s" % (self, href))
         return
     
     # parse the address for the current restaurant
     addr   = soup.find('div', {'class' : 'address'})
     street = addr.find('span', {'class' : 'street'}).getText().strip()
     geo    = addr.find('span', {'class' : 'geo'}).getText().strip()
     
     address = "%s, %s" % (street, geo)
     
     # add the current restaurant to the output for this crawler
     entity = Entity()
     entity.subcategory = "restaurant"
     entity.title   = restaurant_name
     entity.address = address
     entity.sources.zagat = {
         'zurl' : self.base + href, 
     }
     
     #self._globals['soup'] = soup
     # parse cuisine
     header = soup.find('div', {'id' : "block-zagat_restaurants-14"})
     if header is not None:
         header = header.find('ul').find('li', {'class' : 'first'})
         
         if header is not None:
             entity.cuisine = header.getText()
     
     # parse website
     site = soup.find('span', {'class' : 'website'})
     if site is not None:
         site = site.find('a')
         
         if site is not None:
             entity.site = site.get('href')
     
     # parse preview image
     img = soup.find('div', {'id' : 'content'}).find('div', {'class' : 'photo'})
     if img is not None:
         img = img.find('img')
         
         if img is not None:
             entity.image = img.get('src')
     
     self._output.put(entity)

Пример #2

Показать файл

Файл: BarnesAndNobleDump.py Проект: Stamped/Stamped

 def _parse_dump(self, filepath):
     f = gzip.open(filepath, 'rb')
     context = iter(etree.iterparse(f, events=("start", "end")))
     
     event, root = context.next()
     offset = 0
     count  = 0
     
     # loop through XML and parse each product element as a book Entity
     for event, elem in context:
         if event == "end" and elem.tag == "product" and elem.get('product_id') is not None:
             root.clear()
             
             if offset < Globals.options.offset:
                 offset += 1
                 continue
             
             if Globals.options.limit and count >= Globals.options.limit:
                 break
             
             try:
                 #assert 'books' == elem.find('.//primary').text.lower()
                 #assert 'USD' == elem.find('price').get('currency')
                 #assert float(elem.find('price').find('retail').text) >= 0.0
                 
                 entity = Entity()
                 entity.subcategory  = "book"
                 
                 entity.title        = elem.get('name')
                 entity.bid          = int(elem.get('product_id'))
                 entity.sku_number   = elem.get('sku_number')
                 entity.image        = elem.find('.//productImage').text
                 
                 entity.author       = elem.find('.//Author').text
                 entity.publisher    = elem.find('.//Publisher').text
                 entity.publish_date = elem.find('.//Publish_Date').text
                 isbn = elem.find('.//ISBN').text
                 
                 if isbn is None or len(isbn) <= 0:
                     continue
                 
                 entity.isbn         = isbn
                 
                 desc = elem.find('description')
                 is_english = 'nglish' in etree.tostring(desc)
                 
                 if not is_english:
                     continue
                 
                 #print etree.tostring(elem, pretty_print=True)
                 #self._globals['books'] = elem
                 #pprint(entity.value)
                 
                 self._output.put(entity)
                 count += 1
                 
                 # give the downstream consumer threads an occasional chance to work
                 if 0 == (count % 512):
                     time.sleep(0.1)
                 
                 parent = elem.getparent()
                 while True:
                     prev = elem.getprevious()
                     if prev is None:
                         break
                     parent.remove(prev)
                 
                 elem.clear()
             except Exception, e:
                 utils.printException()

Пример #3

Показать файл

Файл: TheTVDBCrawler.py Проект: Stamped/Stamped

 def _parse_series_page(self, name, url):
     if '**' in name or 'DUPLICATE' in name or name.startswith('.hack'):
         return
     
     utils.log('[%s] parsing page %s (%s)' % (self, name, url))
     
     try:
         soup = utils.getSoup(url)
     except:
         utils.printException()
         utils.log("[%s] error downloading page %s (%s)" % (self, name, url))
         return
     
     contents = soup.findAll('div', {'id' : 'content'})
     header = contents[0]
     
     h1 = header.find('h1') 
     title = h1.getText()
     h1.extract()
     
     entity = Entity()
     
     # parse basic show info
     entity.title = title
     entity.subcategory = 'tv'
     
     desc = header.getText().replace('\r\n', '\n')
     if len(desc) > 5:
         entity.desc = desc
     
     entity.sources.thetvdb_id = self._id_re.match(url).groups()[0]
     
     # parse images
     images = map(lambda img: img.get('src'), soup.findAll('img', {'class' : 'banner'}))
     types  = [ 'posters', 'fanart', 'graphical', ]
     
     for image_type in types:
         filtered_images = filter(lambda img: image_type in img, images)
         if len(filtered_images) > 0:
             entity.image = "%s%s" % (self.base, filtered_images[0])
             break
     
     info = contents[1].find('table').find('table')
     rows = info.findAll('tr')
     
     # parse detailed show info
     info_map = {
         0 : 'original_release_date', 
         3 : 'air_time', 
         4 : 'network_name', 
         5 : 'genre', 
     }
     
     for k, k2 in info_map.iteritems():
         try:
             value = rows[k].findAll('td')[1].getText()
             if len(value) > 0:
                 entity[k2] = value
         except:
             utils.printException()
             pass
     
     # parse cast
     try:
         actors = "%s%s" % (self.base, contents[-1].findAll('a')[-1].get('href'))
         actors_soup = utils.getSoup(actors)
         
         infotables = actors_soup.findAll('table', {'class' : 'infotable'})
         cast = []
         
         for infotable in infotables:
             text = infotable.find('td').getText(separator='___')
             match = self._actor_re.match(text)
             if match is not None:
                 groups = match.groups()
                 cast.append('%s as %s' % (groups[0].strip(), groups[1].strip()))
                 # TODO: record actor images
         
         if len(cast) > 0:
             entity.cast = ', '.join(cast)
     except:
         pass
     
     # parse seasons
     try:
         seasons = "%s%s" % (self.base, contents[2].findAll('a')[-1].get('href'))
         seasons_soup = utils.getSoup(seasons)
         
         rows = seasons_soup.find('table', {'id' : 'listtable'}).findAll('tr')[1:]
         
         highest_season = -1
         earliest = None
         latest   = None
         
         # each row is an episode; loop through each episode, recording the 
         # earliest and latest air date for the show overall and the number 
         # of seasons the show ran for.
         for row in rows:
             tds = row.findAll('td')
             episode = tds[0].getText()
             match = self._season_re.match(episode)
             
             if match is not None:
                 groups  = match.groups()
                 season  = int(groups[0])
                 episode = int(groups[1])
                 
                 if season > highest_season:
                     highest_season = season
                 
                 date  = tds[2].getText()
                 match = self._date_re.match(date)
                 
                 if match is not None:
                     year, month, day = match.groups()
                     date = datetime(year=int(year), month=int(month), day=int(day))
                     
                     if earliest is None or date < earliest:
                         earliest = date
                     
                     if latest is None or date > latest:
                         latest = date
         
         if highest_season > 0:
             entity.num_seasons = highest_season
         
         if earliest is not None:
             entity.earliest_air_date = earliest
         
         if latest is not None:
             entity.latest_air_date = latest
     except:
         utils.printException()
     
     entity2 = self._thetvdb.lookup(entity.sources.thetvdb_id)
     
     if entity2 is not None:
         if entity2.mpaa_rating is not None:
             entity.mpaa_rating = entity2.mpaa_rating
         if entity2.imdb_id is not None:
             entity.imdb_id     = entity2.imdb_id
     
     self._output.put(entity)