def to_metadata(self, log, entry): # {{{ title = unicode(entry.xpath(u'normalize-space(.//span[@itemprop="name"][1]/text())')) # log.debug(u'Tile: -----> %s' % title) author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())')) # log.debug(u'Author: -----> %s' % author) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(.//a[starts-with(@href, "/context/detail/id/")][1]/@href), "id/"), "/")') if ozon_id: mi.identifiers = {'ozon':ozon_id} # log.debug(u'ozon_id: -----> %s' % ozon_id) mi.ozon_cover_url = None cover = entry.xpath(u'normalize-space(.//img[1]/@src)') # log.debug(u'cover: -----> %s' % cover) if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url) pub_year = None if pub_year: mi.pubdate = toPubdate(log, pub_year) # log.debug('pubdate %s' % mi.pubdate) mi.rating = self.get_rating(entry) # if not mi.rating: # log.debug('No rating found. ozon_id:%s'%ozon_id) return mi
def to_metadata(self, log, entry): # {{{ xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())' title = entry.xpath(xp_template.format('Name')) author = entry.xpath(xp_template.format('Author')) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.xpath(xp_template.format('ID')) mi.identifiers = {'ozon': ozon_id} mi.comments = entry.xpath(xp_template.format('Annotation')) mi.ozon_cover_url = None cover = entry.xpath(xp_template.format('Picture')) if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) pub_year = entry.xpath(xp_template.format('Year')) if pub_year: mi.pubdate = toPubdate(log, pub_year) #log.debug('pubdate %s'%mi.pubdate) rating = entry.xpath(xp_template.format('ClientRatingValue')) if rating: try: #'rating', A floating point number between 0 and 10 # OZON raion N of 5, calibre of 10, but there is a bug? in identify mi.rating = float(rating) except: pass rating return mi
def get_metadata_from_detail(self, log, entry, title, authors, identifiers): # {{{ title = unicode(entry.xpath(u'normalize-space(.//h1[@itemprop="name"][1]/text())')) # log.debug(u'Tile (from_detail): -----> %s' % title) author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())')) # log.debug(u'Author (from_detail): -----> %s' % author) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(.//a[starts-with(@href, "/context/detail/id/")][1]/@href), "id/"), "/")') if ozon_id: # log.debug(u'ozon_id (from_detail): -----> %s' % ozon_id) mi.identifiers = {'ozon':ozon_id} mi.ozon_cover_url = None cover = entry.xpath(u'normalize-space(.//img[1]/@src)') if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) # log.debug(u'mi.ozon_cover_url (from_detail): -----> %s' % mi.ozon_cover_url) mi.rating = self.get_rating(entry) # log.debug(u'mi.rating (from_detail): -----> %s' % mi.rating) if not mi.rating: log.debug('No rating (from_detail) found. ozon_id:%s'%ozon_id) return mi
def to_metadata(self, log, entry): # {{{ xp_template = 'normalize-space(./*[local-name() = "{0}"]/text())' title = entry.xpath(xp_template.format('Name')) author = entry.xpath(xp_template.format('Author')) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.xpath(xp_template.format('ID')) mi.identifiers = {'ozon':ozon_id} mi.comments = entry.xpath(xp_template.format('Annotation')) mi.ozon_cover_url = None cover = entry.xpath(xp_template.format('Picture')) if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) pub_year = entry.xpath(xp_template.format('Year')) if pub_year: mi.pubdate = toPubdate(log, pub_year) #log.debug('pubdate %s'%mi.pubdate) rating = entry.xpath(xp_template.format('ClientRatingValue')) if rating: try: #'rating', A floating point number between 0 and 10 # OZON raion N of 5, calibre of 10, but there is a bug? in identify mi.rating = float(rating) except: pass rating return mi
def to_metadata(self, log, entry): # {{{ title = unicode(entry.xpath(u'normalize-space(.//span[@itemprop="name"][1]/text())')) # log.debug(u'Tile: -----> %s' % title) author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())')) # log.debug(u'Author: -----> %s' % author) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(.//a[starts-with(@href, "/context/detail/id/")][1]/@href), "id/"), "/")') if ozon_id: mi.identifiers = {'ozon':ozon_id} # log.debug(u'ozon_id: -----> %s' % ozon_id) mi.ozon_cover_url = None cover = entry.xpath(u'normalize-space(.//img[1]/@src)') # log.debug(u'cover: -----> %s' % cover) if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url) pub_year = None if pub_year: mi.pubdate = toPubdate(log, pub_year) # log.debug('pubdate %s' % mi.pubdate) mi.rating = self.get_rating(entry) # if not mi.rating: # log.debug('No rating found. ozon_id:%s'%ozon_id) return mi
def get_metadata_from_detail(self, log, entry, title, authors, identifiers): # {{{ title = unicode(entry.xpath(u'normalize-space(.//h1[@itemprop="name"][1]/text())')) # log.debug(u'Tile (from_detail): -----> %s' % title) author = unicode(entry.xpath(u'normalize-space(.//a[contains(@href, "person")][1]/text())')) # log.debug(u'Author (from_detail): -----> %s' % author) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.xpath(u'substring-before(substring-after(normalize-space(//link[@rel="canonical"][contains(@href, "/context/detail/id/")][1]/@href), "id/"), "/")') if ozon_id: # log.debug(u'ozon_id (from_detail): -----> %s' % ozon_id) mi.identifiers = {'ozon':ozon_id} mi.ozon_cover_url = None cover = entry.xpath(u'normalize-space(.//img[1]/@src)') if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) # log.debug(u'mi.ozon_cover_url (from_detail): -----> %s' % mi.ozon_cover_url) mi.rating = self.get_rating(entry) # log.debug(u'mi.rating (from_detail): -----> %s' % mi.rating) if not mi.rating: log.debug('No rating (from_detail) found. ozon_id:%s'%ozon_id) return mi
def to_metadata(self, log, entry): # {{{ title = unicode( entry.xpath( u'normalize-space(.//div[@itemprop="name"][1]/text())')) # log.debug(u'Title: -----> %s' % title) author = unicode( entry.xpath( u'normalize-space(.//div[contains(@class, "mPerson")])')) # log.debug(u'Author: -----> %s' % author) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.get('data-href').split('/')[-2] if ozon_id: mi.identifiers = {'ozon': ozon_id} # log.debug(u'ozon_id: -----> %s' % ozon_id) mi.ozon_cover_url = None cover = entry.xpath(u'normalize-space(.//img[1]/@src)') log.debug(u'cover: -----> %s' % cover) if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url) pub_year = None pub_year_block = entry.xpath( u'.//div[@class="bOneTileProperty"]/text()') year_pattern = re.compile('\d{4}') if pub_year_block: pub_year = re.search(year_pattern, pub_year_block[0]) if pub_year: mi.pubdate = toPubdate(log, pub_year.group()) # log.debug('pubdate %s' % mi.pubdate) mi.rating = self.get_rating(log, entry) # if not mi.rating: # log.debug('No rating found. ozon_id:%s'%ozon_id) return mi
def to_metadata(self, log, entry): # {{{ title = unicode(entry.xpath(u'normalize-space(.//div[@itemprop="name"][1]/text())')) # log.debug(u'Title: -----> %s' % title) author = unicode(entry.xpath(u'normalize-space(.//div[contains(@class, "mPerson")])')) # log.debug(u'Author: -----> %s' % author) norm_authors = map(_normalizeAuthorNameWithInitials, map(unicode.strip, unicode(author).split(u','))) mi = Metadata(title, norm_authors) ozon_id = entry.get('data-href').split('/')[-2] if ozon_id: mi.identifiers = {'ozon': ozon_id} # log.debug(u'ozon_id: -----> %s' % ozon_id) mi.ozon_cover_url = None cover = entry.xpath(u'normalize-space(.//img[1]/@src)') log.debug(u'cover: -----> %s' % cover) if cover: mi.ozon_cover_url = _translateToBigCoverUrl(cover) # log.debug(u'mi.ozon_cover_url: -----> %s' % mi.ozon_cover_url) pub_year = None pub_year_block = entry.xpath(u'.//div[@class="bOneTileProperty"]/text()') year_pattern = re.compile('\d{4}') if pub_year_block: pub_year = re.search(year_pattern, pub_year_block[0]) if pub_year: mi.pubdate = toPubdate(log, pub_year.group()) # log.debug('pubdate %s' % mi.pubdate) mi.rating = self.get_rating(log, entry) # if not mi.rating: # log.debug('No rating found. ozon_id:%s'%ozon_id) return mi