def obj_thumbnail(self): thumbnail = BaseImage( CleanText( '//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href' )(self.el)) thumbnail.url = thumbnail.id return thumbnail
def obj_picture(self): img = BaseImage() try: img.url = self.el['image'] except KeyError: return return img
def obj_thumbnail(self): url = NormalizeThumbnail( CleanText('/html/head/meta[@property="og:image"]/@content'))( self) if url: thumbnail = BaseImage(url) thumbnail.url = thumbnail.id return thumbnail
def parse_movie(self, movie): video = BaseVideo(u'%s#%s' % (movie['code'], 'movie')) video.title = unicode(movie['trailer']['name']) video._video_code = unicode(movie['trailer']['code']) video.ext = u'mp4' if 'poster' in movie: video.thumbnail = BaseImage(movie['poster']['href']) video.thumbnail.url = unicode(movie['poster']['href']) tdate = movie['release']['releaseDate'].split('-') day = 1 month = 1 year = 1901 if len(tdate) > 2: year = int(tdate[0]) month = int(tdate[1]) day = int(tdate[2]) video.date = date(year, month, day) if 'userRating' in movie['statistics']: video.rating = movie['statistics']['userRating'] elif 'pressRating' in movie['statistics']: video.rating = movie['statistics']['pressRating'] * 2 video.rating_max = 5 if 'synopsis' in movie: video.description = unicode(movie['synopsis'].replace( '<p>', '').replace('</p>', '')) elif 'synopsisShort' in movie: video.description = unicode(movie['synopsisShort'].replace( '<p>', '').replace('</p>', '')) if 'castingShort' in movie: if 'directors' in movie['castingShort']: video.author = unicode(movie['castingShort']['directors']) if 'runtime' in movie: video.duration = timedelta(seconds=int(movie['runtime'])) return video
def set_video_metadata(self, video): # The player html code with all the required information is loaded # after the main page using javascript and a special XmlHttpRequest # we emulate this behaviour from_request = self.group_dict['from'] query = urllib.urlencode({ 'from_request': from_request, 'request': '/video/%s?get_video=1' % video.id }) request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query) # This header is mandatory to have the correct answer from dailymotion request.add_header('X-Requested-With', 'XMLHttpRequest') player_html = self.browser.readurl(request) try: m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"', player_html) flashvars = urlparse.parse_qs(m.group('flashvars')) info = json.loads(flashvars['sequence'][0]) # The video parameters seem to be always located at the same place # in the structure: ['sequence'][0]['layerList'][0]['sequenceList'] # [0]['layerList'][0]['param']['extraParams']) # # but to be more tolerant to future changes in the structure, we # prefer to look for the parameters everywhere in the structure def find_video_params(data): if isinstance(data, dict): if 'param' in data and 'extraParams' in data['param']: return data['param']['extraParams'] data = data.values() if not isinstance(data, list): return None for item in data: ret = find_video_params(item) if ret: return ret return None params = find_video_params(info['sequence']) video.title = unicode(params['videoTitle']) video.author = unicode(params['videoOwnerLogin']) video.description = unicode(params['videoDescription']) video.thumbnail = BaseImage(params['videoPreviewURL']) video.thumbnail.url = unicode(params['videoPreviewURL']) video.duration = datetime.timedelta( seconds=params['mediaDuration']) except: # If anything goes wrong, we prefer to return normally, this will # allow video download to work even if we don't have the metadata pass
def create_video_from_json(self, _video): video = BaseVideo() video.id = u'%s' % _video['id'] video.backend = u'%s' % _video['id'].split('@')[-1] if 'url' in _video.keys(): video.url = u'%s' % _video['url'] if 'thumbnail' in _video.keys() and _video['thumbnail'] and 'url' in _video['thumbnail'].keys(): video.thumbnail = BaseImage() video.thumbnail.url = u'%s' % _video['thumbnail']['url'] else: video.thumbnail.url = u'' video.title = u'%s' % _video['title'] if _video['date']: _date = re.search('(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*', _video['date']) try: datetime.strptime(_date.group(1), '%Y-%m-%d %H:%M:%S') except TypeError: datetime(*(time.strptime(_date.group(1), '%Y-%m-%d %H:%M:%S')[0:6])) video.description = u'%s' % _video['description'] video.author = u'%s' % _video['author'] if _video['duration']: _duration = _video['duration'].split(':') video.duration = timedelta(hours=int(_duration[0]), minutes=int(_duration[1]), seconds=int(_duration[2])) return video
def get_video(self, _id): video = QuviVideo(_id) parser = LibQuvi() if not parser.load(): raise UserError('Make sure libquvi 0.4 is installed') try: info = parser.get_info(video.page_url) except QuviError as qerror: raise UserError(qerror.message) video.url = to_unicode(info.get('url')) if not video.url: raise NotImplementedError() video.ext = to_unicode(info.get('suffix')) video.title = to_unicode(info.get('title')) video.page = to_unicode(info.get('page')) duration = int(info.get('duration', 0)) if duration: video.duration = datetime.timedelta(milliseconds=duration) if info.get('thumbnail'): video.thumbnail = BaseImage(info.get('thumbnail')) video.thumbnail.url = video.thumbnail.id return video
def create_audio(self, song): audio = GroovesharkAudio(song['SongID']) try: audio.title = u'%s' % song['SongName'].encode('ascii', 'replace') except: audio.title = u'%s' % song['Name'].encode('ascii', 'replace') audio.author = u'%s' % song['ArtistName'].encode('ascii', 'replace') audio.description = u'%s - %s' % ( audio.author, song['AlbumName'].encode('ascii', 'replace')) if song['CoverArtFilename']: audio.thumbnail = BaseImage( u'http://images.gs-cdn.net/static/albums/40_' + song['CoverArtFilename']) audio.thumbnail.url = audio.thumbnail.id if song['EstimateDuration']: audio.duration = datetime.timedelta( seconds=int(float(song['EstimateDuration']))) try: if 'Year' in song.keys() and song['Year']: audio.date = datetime.date(year=int(song['Year']), month=1, day=1) except ValueError: audio.date = NotAvailable return audio
def fill_gallery(self, gallery): gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0] try: gallery.original_title = self.document.xpath("//h1[@id='gj']/text()")[0] except IndexError: gallery.original_title = None description_div = self.document.xpath("//div[@id='gd71']")[0] description_html = self.parser.tostring(description_div) gallery.description = html2text(description_html) cardinality_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Images:']/td[@class='gdt2']/text()")[0] gallery.cardinality = int(re.match(r"\d+", cardinality_string).group(0)) date_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Posted:']/td[@class='gdt2']/text()")[0] gallery.date = datetime.strptime(date_string, "%Y-%m-%d %H:%M") rating_string = self.document.xpath("//td[@id='rating_label']/text()")[0] rating_match = re.search(r"\d+\.\d+", rating_string) gallery.rating = None if rating_match is None else float(rating_match.group(0)) gallery.rating_max = 5 try: thumbnail_url = self.document.xpath("//div[@class='gdtm']/a/img/attribute::src")[0] except IndexError: thumbnail_style = self.document.xpath("//div[@class='gdtm']/div/attribute::style")[0] thumbnail_url = re.search(r"background:[^;]+url\((.+?)\)", thumbnail_style).group(1) gallery.thumbnail = BaseImage(thumbnail_url) gallery.thumbnail.url = gallery.thumbnail.id
def iter_videos(self, pattern=None): videos = self.document.getroot().cssselect("div[class=bloc-contenu-8]") for div in videos: title = self.parser.select(div, 'h1', 1).text_content().replace(' ', ' ') if pattern: if pattern.upper() not in title.upper(): continue m = re.match(r'/contenu.php\?id=(.*)', div.find('a').attrib['href']) _id = '' if m: _id = m.group(1) video = ArretSurImagesVideo(_id) video.title = unicode(title) video.rating = None video.rating_max = None thumb = self.parser.select(div, 'img', 1) url = u'http://www.arretsurimages.net' + thumb.attrib['src'] video.thumbnail = BaseImage(url) video.thumbnail.url = video.thumbnail.id yield video
def obj_thumbnail(self): thumbnail = NormalizeThumbnail( CleanText('a/span[@class="item-entry-preview"]/img/@src'))( self) if thumbnail: thumbnail = BaseImage(thumbnail) thumbnail.url = thumbnail.id return thumbnail
def create_video(metadata): video = RmllVideo(metadata['oid']) video.title = unicode(metadata['title']) video.date = DateTime(Dict('creation'), default=NotLoaded)(metadata) video.duration = RmllDuration(Dict('duration', default=''), default=NotLoaded)(metadata) thumbnail = NormalizeThumbnail(Dict('thumb'))(metadata) video.thumbnail = BaseImage(thumbnail) video.thumbnail.url = video.thumbnail.id video.url = NotLoaded return video
def iter_videos(self): for div in self.parser.select(self.document.getroot(), 'div.sd_video_listitem'): smalldiv = self.parser.select(div, 'div.sd_video_preview', 1) _id = smalldiv.attrib.get('data-id', None) if _id is None: self.browser.logger.warning('Unable to find the ID of a video') continue video = DailymotionVideo(_id) video.title = unicode( self.parser.select(div, 'div a img', 1).attrib['title']).strip() video.author = unicode( self.parser.select(div, 'a.link-on-hvr', 1).text).strip() video.description = NotAvailable try: parts = self.parser.select(div, 'div.badge-duration', 1).text.split(':') except BrokenPageError: # it's probably a live, np. video.duration = NotAvailable else: if len(parts) == 1: seconds = parts[0] hours = minutes = 0 elif len(parts) == 2: minutes, seconds = parts hours = 0 elif len(parts) == 3: hours, minutes, seconds = parts else: raise BrokenPageError( 'Unable to parse duration %r' % self.parser.select(div, 'div.duration', 1).text) video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) url = unicode( self.parser.select(div, 'img.preview', 1).attrib['data-src']) # remove the useless anti-caching url = re.sub('\?\d+', '', url) video.thumbnail = BaseImage(url) video.thumbnail.url = video.thumbnail.id video.set_empty_fields(NotAvailable, ('url', )) yield video
def _entry2video(self, entry): """ Parse an entry returned by gdata and return a Video object. """ video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip())) video.title = to_unicode(entry.media.title.text.strip()) video.duration = datetime.timedelta( seconds=int(entry.media.duration.seconds.strip())) video.thumbnail = BaseImage(entry.media.thumbnail[0].url.strip()) video.thumbnail.url = to_unicode(video.thumbnail.id) if entry.author[0].name.text: video.author = to_unicode(entry.author[0].name.text.strip()) if entry.media.name: video.author = to_unicode(entry.media.name.text.strip()) return video
def create_album(self, _album): album = Album(_album['AlbumID']) try: album.title = u'%s' % _album['AlbumName'] except: album.title = u'%s' % _album['Name'] album.author = u'%s' % _album['ArtistName'] if _album['Year']: album.year = int(_album['Year']) if _album['CoverArtFilename']: album.thumbnail = BaseImage( u'http://images.gs-cdn.net/static/albums/80_' + _album['CoverArtFilename']) album.thumbnail.url = album.thumbnail.id return album
def set_video_metadata(self, video): head = self.parser.select(self.document.getroot(), 'head', 1) video.title = unicode( self.parser.select(head, 'meta[property="og:title"]', 1).get("content")).strip() video.author = unicode( self.parser.select(head, 'meta[name="author"]', 1).get("content")).strip() url = unicode( self.parser.select(head, 'meta[property="og:image"]', 1).get("content")).strip() # remove the useless anti-caching url = re.sub('\?\d+', '', url) video.thumbnail = BaseImage(url) video.thumbnail.url = video.thumbnail.id try: parts = self.parser.select(head, 'meta[property="video:duration"]', 1).get("content").strip().split(':') except BrokenPageError: # it's probably a live, np. video.duration = NotAvailable else: if len(parts) == 1: seconds = parts[0] hours = minutes = 0 elif len(parts) == 2: minutes, seconds = parts hours = 0 elif len(parts) == 3: hours, minutes, seconds = parts else: raise BrokenPageError('Unable to parse duration %r' % parts) video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) try: video.description = html2text( self.parser.select(head, 'meta[property="og:description"]', 1).get("content")).strip() or unicode() except BrokenPageError: video.description = u''
def get_video_from_json(self, data): # session_id is unique per talk # vault_media_id is unique per page # (but can refer to 2 video files for dual screen) # solr_id is "${vault_media_id}.${conference_id}.${session_id}.$vault_media_type_id{}" # XXX: do we filter them or let people know about them? #if 'anchor' in data: # if data['anchor']['href'] == '#': # # file will not be accessible (not free and not logged in) # return None if 'vault_media_id' not in data: return None media_id = int(data['vault_media_id']) video = GDCVaultVideo(media_id) # 1013679 has \n in title... video.title = unicode(data.get('session_name', '').replace('\n', '')) # TODO: strip out <p>, <br> and other html... # XXX: 1013422 has all 3 and != if 'overview' in data: video.description = unicode(data['overview']) elif 'spell' in data: video.description = unicode(data['spell']) else: video.description = unicode(data.get('description', '')) if 'image' in data: video.thumbnail = BaseImage(data['image']) video.thumbnail.url = video.thumbnail.id if 'speakers_name' in data: video.author = unicode(", ".join(data['speakers_name'])) if 'start_date' in data: video.date = parse_dt(data['start_date']) if 'score' in data: video.rating = data['score'] video.set_empty_fields(NotAvailable) return video
def parse_video(self, el, video=None): _id = el.find('ID').text if _id == '-1': # means the video is not found return None if not video: video = CanalplusVideo(_id) infos = el.find('INFOS') video.title = u'' for part in infos.find('TITRAGE'): if len(part.text.strip()) == 0: continue if len(video.title) > 0: video.title += u' — ' video.title += part.text.strip() video.description = unicode(infos.find('DESCRIPTION').text) media = el.find('MEDIA') url = media.find('IMAGES').find('PETIT').text if url: video.thumbnail = BaseImage(url) video.thumbnail.url = video.thumbnail.id else: video.thumbnail = NotAvailable for format in media.find('VIDEOS'): if format.text is None: continue if format.tag == 'HLS': video.ext = u'm3u8' video.url = unicode(format.text) break day, month, year = map( int, infos.find('PUBLICATION').find('DATE').text.split('/')) hour, minute, second = map( int, infos.find('PUBLICATION').find('HEURE').text.split(':')) video.date = datetime(year, month, day, hour, minute, second) return video
def iter_videos(self): # When no results are found, the website returns random results sb = self.parser.select(self.document.getroot(), 'div.search form input.searchbox', 1) if sb.value == 'No Results Found': return #Extracting meta data from results page vidbackdrop_list = self.parser.select(self.document.getroot(), 'div.vidBackdrop ') for vidbackdrop in vidbackdrop_list: url = self.parser.select(vidbackdrop, 'a', 1).attrib['href'] _id = url[2:] video = CappedVideo(_id) video.set_empty_fields(NotAvailable, ('url',)) video.title = to_unicode(self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text) video.author = to_unicode(self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text) thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id video.thumbnail = BaseImage(thumbnail_url) video.thumbnail.url = to_unicode(video.thumbnail.id) #we get the description field duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1) #we remove tabs and spaces duration_tmp2 = duration_tmp.text[7:] #we remove all fields exept time duration_tmp3 = duration_tmp2.split(' ')[0] #we transform it in datetime format parts = duration_tmp3.split(':') if len(parts) == 1: hours = minutes = 0 seconds = parts[0] elif len(parts) == 2: hours = 0 minutes, seconds = parts elif len(parts) == 3: hours, minutes, seconds = parts else: raise BrokenPageError('Unable to parse duration %r' % duration_tmp) video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) yield video
def iter_videos(self): for span in self.document.xpath('//ul[@id="list"]/li'): a = self.parser.select(span, 'a', 1) url = a.attrib['href'] _id = re.sub(r'/showvideo/(\d+)/.*', r'\1', url) video = JacquieEtMichelVideo(_id) url = span.find('.//img').attrib['src'] video.thumbnail = BaseImage(url) video.thumbnail.url = video.thumbnail.id title_el = self.parser.select(span, 'h2', 1) video.title = to_unicode(title_el.text.strip()) video.description = self.parser.tocleanstring( span.xpath('.//div[@class="desc"]')[0]) video.set_empty_fields(NotAvailable, ('url,')) yield video
def iter_videos(self): for a in self.parser.select( self.document.getroot(), 'section.conference ul.media_items li.featured a.session_item' ): href = a.attrib.get('href', '') # print href m = re.match('/play/(\d+)/.*', href) if not m: continue # print m.group(1) video = GDCVaultVideo(m.group(1)) # get title try: video.title = unicode( self.parser.select(a, 'div.conference_info p strong', 1).text) except IndexError: video.title = NotAvailable # get description try: video.description = unicode( self.parser.select(a, 'div.conference_info p', 1).text) except IndexError: video.description = NotAvailable # get thumbnail img = self.parser.select(a, 'div.featured_image img', 1) if img is not None: video.thumbnail = BaseImage(img.attrib['src']) video.thumbnail.url = video.thumbnail.id else: video.thumbnail = NotAvailable #m = re.match('id-(\d+)', a.attrib.get('class', '')) #if not m: # continue # FIXME yield video
def iter_videos(self): for li in self.document.getroot().xpath('//ul/li[@class="videoBox"]'): a = li.find('div').find('a') if a is None or a.find('img') is None: continue thumbnail_url = a.find('img').attrib['src'] a = self.parser.select(li, './/a[@class="videoTitle"]', 1, 'xpath') url = a.attrib['href'] _id = url[len('/watch/'):] _id = _id[:_id.find('/')] video = YoupornVideo(int(_id)) video.title = unicode(a.text.strip()) video.thumbnail = BaseImage(thumbnail_url) video.thumbnail.url = video.thumbnail.id hours = minutes = seconds = 0 div = li.cssselect('div.duration') if len(div) > 0: pack = [int(s) for s in div[0].text.strip().split(':')] if len(pack) == 3: hours, minutes, seconds = pack elif len(pack) == 2: minutes, seconds = pack video.duration = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds) div = li.cssselect('div.rating') if div: video.rating = int(div[0].text.strip('% ')) video.rating_max = 100 video.set_empty_fields(NotAvailable, ('url', 'author')) yield video
def iter_videos(self, cat, lang='fr'): articles = self.document.xpath( '//div[@id="video_box_tab_%s"]/article' % cat) videos = [] for article in articles: _id = article.attrib['about'] title = self.parser.select( article, 'div/div[@class="info-article "]/div/h3/a', 1, method='xpath').text thumbnail = self.parser.select(article, 'div/div/a/figure/span/span', 1, method='xpath').attrib['data-src'] video = ArteLiveVideo(_id) video.title = u'%s' % title video.thumbnail = BaseImage(thumbnail) video.thumbnail.url = video.thumbnail.id video.set_empty_fields(NotAvailable, ('url', )) videos.append(video) return videos
def iter_videos(self): for div in self.parser.select(self.document.getroot(), 'li#contentsearch'): title = self.parser.select(div, '#titlesearch span', 1) a = self.parser.select(div, 'a', 1) url = a.attrib['href'] m = re.match('/video-(.*)', url) if not m: self.logger.debug('url %s does not match' % url) continue _id = m.group(1) video = TricTracTVVideo(_id) video.title = unicode(title.text) url = self.parser.select(div, 'img', 1).attrib['src'] stars = self.parser.select(div, '.etoile_on') video.rating = len(stars) video.rating_max = 5 video.thumbnail = BaseImage('http://www.trictrac.tv/%s' % url) video.thumbnail.url = video.thumbnail.id yield video
def get_video(self, video): if not video: video = NolifeTVVideo(self.group_dict['id']) els = self.document.getroot().xpath('//div[@data-role="content"]') if els and els[0] is not None: h3 = els[0].find('h3') if h3 is not None and h3.text: video.title = unicode(h3.text) h4 = els[0].find('h4') if h4 is not None and h4.text: video.title = video.title + u' - ' + h4.text thumb = els[0].find('p/img') if thumb is not None and thumb.get('src'): video.thumbnail = BaseImage(thumb.attrib['src']) video.thumbnail.url = video.thumbnail.id ps = els[0].findall('p') if len(ps) > 4: if ps[4].text: video.description = ps[4].text if ps[0].text and ps[0].text != u'∞': video.date = datetime.strptime(ps[0].text, '%d/%m/%Y').date() for text in ps[2].xpath('.//text()'): m = re.search(r'[^\d]*((\d+):)?(\d+)s?', text) if m: if m.group(2): minutes = int(m.group(2)) else: minutes = 0 video.duration = timedelta(minutes=minutes, seconds=int(m.group(3))) return video
def iter_videos(self): try: ul = self.parser.select(self.document.getroot(), 'div.container-videos ul', 1) except BrokenPageError: # It means there are no results. return for li in ul.findall('li'): url = li.find('a').find('img').attrib['src'] id = re.sub(self.URL_REGEXP, r'\2', url) video = InaVideo(id) video.thumbnail = BaseImage(u'http://boutique.ina.fr%s' % url) video.thumbnail.url = video.thumbnail.id # The title is poorly encoded is the source, we have to encode/decode it again video.title = unicode(self.parser.select( li, 'p.titre', 1).text).encode('raw_unicode_escape').decode('utf8') date = self.parser.select(li, 'p.date', 1).text day, month, year = [int(s) for s in date.split('/')] video.date = datetime.datetime(year, month, day) duration = self.parser.select(li, 'p.duree', 1).text m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration) if m: video.duration = datetime.timedelta(hours=int(m.group(2) or 0), minutes=int( m.group(4) or 0), seconds=int(m.group(5))) else: raise BrokenPageError('Unable to match duration (%r)' % duration) yield video
def get_video(self, video): if not video: video = InaVideo(self.get_id()) video.title = u'%s' % self.parser.select(self.document.getroot(), '//rss/channel/item/title', 1, method='xpath').text _image = u'%s' % self.parser.select(self.document.getroot(), '//rss/channel/item/media:content/media:thumbnail', 1, method='xpath', namespaces={'media': 'http://search.yahoo.com/mrss/'}).attrib['url'] video.thumbnail = BaseImage(_image) video.thumbnail.url = video.thumbnail.id video.url = u'%s' % self.parser.select(self.document.getroot(), '//rss/channel/item/media:content', 1, method='xpath', namespaces={'media': 'http://search.yahoo.com/mrss/'}).attrib['url'] _date = self.parser.select(self.document.getroot(), '//rss/channel/item/pubDate', 1, method='xpath').text video.date = datetime.strptime(_date[:-6], '%a, %d %b %Y %H:%M:%S') video.description = u'%s' % self.parser.select(self.document.getroot(), '//rss/channel/item/description', 1, method='xpath').text video.set_empty_fields(NotAvailable) return video
def obj_thumbnail(self): thumbnail = BaseImage(self.xpath('./a/img')[0].attrib['src']) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = BaseImage(self.xpath(".//img")[0].attrib["data-original"]) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = BaseImage(self.xpath('.//img')[0].attrib['data-original']) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = Format('http://pluzz.francetv.fr%s', Dict['image'])(self) thumbnail = BaseImage(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = BaseImage(Dict('thumbnailUrl')(self.el)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = BaseImage(CleanText('//image[1]/url')(self)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): if 'path_img_emission' in self.el: thumbnail = BaseImage(Dict('path_img_emission')(self)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail_url = Attr('./img', 'src')(self) thumbnail = BaseImage(thumbnail_url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = Attr('a/img[@class="resultat-vignette"]', 'data-src')(self) thumbnail = BaseImage(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = BaseImage(CleanText('//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href')(self.el)) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): thumbnail = BaseImage(self.xpath("./a/img")[0].attrib["src"]) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self) if url: thumbnail = BaseImage(url) thumbnail.url = thumbnail.id return thumbnail
def obj_thumbnail(self): url = Attr('a[@class="vignette"]/img', 'data-src')(self) thumbnail = BaseImage(url) thumbnail.url = thumbnail.id return thumbnail