示例#1
0
 def obj_thumbnail(self):
     thumbnail = BaseImage(
         CleanText(
             '//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href'
         )(self.el))
     thumbnail.url = thumbnail.id
     return thumbnail
示例#2
0
文件: pages.py 项目: guix77/weboob
 def obj_picture(self):
     img = BaseImage()
     try:
         img.url = self.el['image']
     except KeyError:
         return
     return img
示例#3
0
 def obj_thumbnail(self):
     url = NormalizeThumbnail(
         CleanText('/html/head/meta[@property="og:image"]/@content'))(
             self)
     if url:
         thumbnail = BaseImage(url)
         thumbnail.url = thumbnail.id
         return thumbnail
示例#4
0
    def parse_movie(self, movie):
        video = BaseVideo(u'%s#%s' % (movie['code'], 'movie'))
        video.title = unicode(movie['trailer']['name'])
        video._video_code = unicode(movie['trailer']['code'])
        video.ext = u'mp4'
        if 'poster' in movie:
            video.thumbnail = BaseImage(movie['poster']['href'])
            video.thumbnail.url = unicode(movie['poster']['href'])
        tdate = movie['release']['releaseDate'].split('-')
        day = 1
        month = 1
        year = 1901
        if len(tdate) > 2:
            year = int(tdate[0])
            month = int(tdate[1])
            day = int(tdate[2])

        video.date = date(year, month, day)
        if 'userRating' in movie['statistics']:
            video.rating = movie['statistics']['userRating']
        elif 'pressRating' in movie['statistics']:
            video.rating = movie['statistics']['pressRating'] * 2
        video.rating_max = 5
        if 'synopsis' in movie:
            video.description = unicode(movie['synopsis'].replace(
                '<p>', '').replace('</p>', ''))
        elif 'synopsisShort' in movie:
            video.description = unicode(movie['synopsisShort'].replace(
                '<p>', '').replace('</p>', ''))
        if 'castingShort' in movie:
            if 'directors' in movie['castingShort']:
                video.author = unicode(movie['castingShort']['directors'])
        if 'runtime' in movie:
            video.duration = timedelta(seconds=int(movie['runtime']))
        return video
示例#5
0
    def set_video_metadata(self, video):

        # The player html code with all the required information is loaded
        # after the main page using javascript and a special XmlHttpRequest
        # we emulate this behaviour
        from_request = self.group_dict['from']

        query = urllib.urlencode({
            'from_request': from_request,
            'request': '/video/%s?get_video=1' % video.id
        })

        request = mechanize.Request(KidsVideoPage.CONTROLLER_PAGE % query)
        # This header is mandatory to have the correct answer from dailymotion
        request.add_header('X-Requested-With', 'XMLHttpRequest')
        player_html = self.browser.readurl(request)

        try:
            m = re.search('<param name="flashvars" value="(?P<flashvars>.*?)"',
                          player_html)
            flashvars = urlparse.parse_qs(m.group('flashvars'))
            info = json.loads(flashvars['sequence'][0])

            # The video parameters seem to be always located at the same place
            # in the structure: ['sequence'][0]['layerList'][0]['sequenceList']
            #   [0]['layerList'][0]['param']['extraParams'])
            #
            # but to be more tolerant to future changes in the structure, we
            # prefer to look for the parameters everywhere in the structure

            def find_video_params(data):
                if isinstance(data, dict):
                    if 'param' in data and 'extraParams' in data['param']:
                        return data['param']['extraParams']
                    data = data.values()

                if not isinstance(data, list):
                    return None

                for item in data:
                    ret = find_video_params(item)
                    if ret:
                        return ret

                return None

            params = find_video_params(info['sequence'])

            video.title = unicode(params['videoTitle'])
            video.author = unicode(params['videoOwnerLogin'])
            video.description = unicode(params['videoDescription'])
            video.thumbnail = BaseImage(params['videoPreviewURL'])
            video.thumbnail.url = unicode(params['videoPreviewURL'])
            video.duration = datetime.timedelta(
                seconds=params['mediaDuration'])

        except:
            # If anything goes wrong, we prefer to return normally, this will
            # allow video download to work even if we don't have the metadata
            pass
示例#6
0
    def create_video_from_json(self, _video):
        video = BaseVideo()
        video.id = u'%s' % _video['id']
        video.backend = u'%s' % _video['id'].split('@')[-1]

        if 'url' in _video.keys():
            video.url = u'%s' % _video['url']

        if 'thumbnail' in _video.keys() and _video['thumbnail'] and 'url' in _video['thumbnail'].keys():
            video.thumbnail = BaseImage()
            video.thumbnail.url = u'%s' % _video['thumbnail']['url']
        else:
            video.thumbnail.url = u''
        video.title = u'%s' % _video['title']

        if _video['date']:
            _date = re.search('(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}).*', _video['date'])

            try:
                datetime.strptime(_date.group(1), '%Y-%m-%d %H:%M:%S')
            except TypeError:
                datetime(*(time.strptime(_date.group(1), '%Y-%m-%d %H:%M:%S')[0:6]))

        video.description = u'%s' % _video['description']
        video.author = u'%s' % _video['author']

        if _video['duration']:
            _duration = _video['duration'].split(':')
            video.duration = timedelta(hours=int(_duration[0]), minutes=int(_duration[1]), seconds=int(_duration[2]))

        return video
示例#7
0
    def get_video(self, _id):
        video = QuviVideo(_id)

        parser = LibQuvi()
        if not parser.load():
            raise UserError('Make sure libquvi 0.4 is installed')

        try:
            info = parser.get_info(video.page_url)
        except QuviError as qerror:
            raise UserError(qerror.message)

        video.url = to_unicode(info.get('url'))
        if not video.url:
            raise NotImplementedError()

        video.ext = to_unicode(info.get('suffix'))
        video.title = to_unicode(info.get('title'))
        video.page = to_unicode(info.get('page'))
        duration = int(info.get('duration', 0))
        if duration:
            video.duration = datetime.timedelta(milliseconds=duration)
        if info.get('thumbnail'):
            video.thumbnail = BaseImage(info.get('thumbnail'))
            video.thumbnail.url = video.thumbnail.id
        return video
示例#8
0
    def create_audio(self, song):
        audio = GroovesharkAudio(song['SongID'])
        try:
            audio.title = u'%s' % song['SongName'].encode('ascii', 'replace')
        except:
            audio.title = u'%s' % song['Name'].encode('ascii', 'replace')

        audio.author = u'%s' % song['ArtistName'].encode('ascii', 'replace')
        audio.description = u'%s - %s' % (
            audio.author, song['AlbumName'].encode('ascii', 'replace'))

        if song['CoverArtFilename']:
            audio.thumbnail = BaseImage(
                u'http://images.gs-cdn.net/static/albums/40_' +
                song['CoverArtFilename'])
            audio.thumbnail.url = audio.thumbnail.id

        if song['EstimateDuration']:
            audio.duration = datetime.timedelta(
                seconds=int(float(song['EstimateDuration'])))

        try:
            if 'Year' in song.keys() and song['Year']:
                audio.date = datetime.date(year=int(song['Year']),
                                           month=1,
                                           day=1)
        except ValueError:
            audio.date = NotAvailable

        return audio
示例#9
0
    def fill_gallery(self, gallery):
        gallery.title = self.document.xpath("//h1[@id='gn']/text()")[0]
        try:
            gallery.original_title = self.document.xpath("//h1[@id='gj']/text()")[0]
        except IndexError:
            gallery.original_title = None
        description_div = self.document.xpath("//div[@id='gd71']")[0]
        description_html = self.parser.tostring(description_div)
        gallery.description = html2text(description_html)
        cardinality_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Images:']/td[@class='gdt2']/text()")[0]
        gallery.cardinality = int(re.match(r"\d+", cardinality_string).group(0))
        date_string = self.document.xpath("//div[@id='gdd']//tr[td[@class='gdt1']/text()='Posted:']/td[@class='gdt2']/text()")[0]
        gallery.date = datetime.strptime(date_string, "%Y-%m-%d %H:%M")
        rating_string = self.document.xpath("//td[@id='rating_label']/text()")[0]
        rating_match = re.search(r"\d+\.\d+", rating_string)
        gallery.rating = None if rating_match is None else float(rating_match.group(0))
        gallery.rating_max = 5

        try:
            thumbnail_url = self.document.xpath("//div[@class='gdtm']/a/img/attribute::src")[0]
        except IndexError:
            thumbnail_style = self.document.xpath("//div[@class='gdtm']/div/attribute::style")[0]
            thumbnail_url = re.search(r"background:[^;]+url\((.+?)\)", thumbnail_style).group(1)

        gallery.thumbnail = BaseImage(thumbnail_url)
        gallery.thumbnail.url = gallery.thumbnail.id
示例#10
0
    def iter_videos(self, pattern=None):
        videos = self.document.getroot().cssselect("div[class=bloc-contenu-8]")
        for div in videos:
            title = self.parser.select(div, 'h1',
                                       1).text_content().replace('  ', ' ')
            if pattern:
                if pattern.upper() not in title.upper():
                    continue
            m = re.match(r'/contenu.php\?id=(.*)',
                         div.find('a').attrib['href'])
            _id = ''
            if m:
                _id = m.group(1)

            video = ArretSurImagesVideo(_id)
            video.title = unicode(title)
            video.rating = None
            video.rating_max = None

            thumb = self.parser.select(div, 'img', 1)
            url = u'http://www.arretsurimages.net' + thumb.attrib['src']
            video.thumbnail = BaseImage(url)
            video.thumbnail.url = video.thumbnail.id

            yield video
示例#11
0
 def obj_thumbnail(self):
     thumbnail = NormalizeThumbnail(
         CleanText('a/span[@class="item-entry-preview"]/img/@src'))(
             self)
     if thumbnail:
         thumbnail = BaseImage(thumbnail)
         thumbnail.url = thumbnail.id
         return thumbnail
示例#12
0
def create_video(metadata):
    video = RmllVideo(metadata['oid'])

    video.title = unicode(metadata['title'])
    video.date = DateTime(Dict('creation'), default=NotLoaded)(metadata)
    video.duration = RmllDuration(Dict('duration', default=''),
                                  default=NotLoaded)(metadata)
    thumbnail = NormalizeThumbnail(Dict('thumb'))(metadata)
    video.thumbnail = BaseImage(thumbnail)
    video.thumbnail.url = video.thumbnail.id
    video.url = NotLoaded

    return video
示例#13
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.sd_video_listitem'):
            smalldiv = self.parser.select(div, 'div.sd_video_preview', 1)
            _id = smalldiv.attrib.get('data-id', None)

            if _id is None:
                self.browser.logger.warning('Unable to find the ID of a video')
                continue

            video = DailymotionVideo(_id)
            video.title = unicode(
                self.parser.select(div, 'div a img',
                                   1).attrib['title']).strip()
            video.author = unicode(
                self.parser.select(div, 'a.link-on-hvr', 1).text).strip()
            video.description = NotAvailable
            try:
                parts = self.parser.select(div, 'div.badge-duration',
                                           1).text.split(':')
            except BrokenPageError:
                # it's probably a live, np.
                video.duration = NotAvailable
            else:
                if len(parts) == 1:
                    seconds = parts[0]
                    hours = minutes = 0
                elif len(parts) == 2:
                    minutes, seconds = parts
                    hours = 0
                elif len(parts) == 3:
                    hours, minutes, seconds = parts
                else:
                    raise BrokenPageError(
                        'Unable to parse duration %r' %
                        self.parser.select(div, 'div.duration', 1).text)
                video.duration = datetime.timedelta(hours=int(hours),
                                                    minutes=int(minutes),
                                                    seconds=int(seconds))
            url = unicode(
                self.parser.select(div, 'img.preview', 1).attrib['data-src'])
            # remove the useless anti-caching
            url = re.sub('\?\d+', '', url)
            video.thumbnail = BaseImage(url)
            video.thumbnail.url = video.thumbnail.id

            video.set_empty_fields(NotAvailable, ('url', ))
            yield video
示例#14
0
    def _entry2video(self, entry):
        """
        Parse an entry returned by gdata and return a Video object.
        """
        video = YoutubeVideo(to_unicode(entry.id.text.split('/')[-1].strip()))
        video.title = to_unicode(entry.media.title.text.strip())
        video.duration = datetime.timedelta(
            seconds=int(entry.media.duration.seconds.strip()))
        video.thumbnail = BaseImage(entry.media.thumbnail[0].url.strip())
        video.thumbnail.url = to_unicode(video.thumbnail.id)

        if entry.author[0].name.text:
            video.author = to_unicode(entry.author[0].name.text.strip())
        if entry.media.name:
            video.author = to_unicode(entry.media.name.text.strip())
        return video
示例#15
0
    def create_album(self, _album):
        album = Album(_album['AlbumID'])
        try:
            album.title = u'%s' % _album['AlbumName']
        except:
            album.title = u'%s' % _album['Name']

        album.author = u'%s' % _album['ArtistName']
        if _album['Year']:
            album.year = int(_album['Year'])

        if _album['CoverArtFilename']:
            album.thumbnail = BaseImage(
                u'http://images.gs-cdn.net/static/albums/80_' +
                _album['CoverArtFilename'])
            album.thumbnail.url = album.thumbnail.id
        return album
示例#16
0
    def set_video_metadata(self, video):

        head = self.parser.select(self.document.getroot(), 'head', 1)

        video.title = unicode(
            self.parser.select(head, 'meta[property="og:title"]',
                               1).get("content")).strip()
        video.author = unicode(
            self.parser.select(head, 'meta[name="author"]',
                               1).get("content")).strip()

        url = unicode(
            self.parser.select(head, 'meta[property="og:image"]',
                               1).get("content")).strip()
        # remove the useless anti-caching
        url = re.sub('\?\d+', '', url)
        video.thumbnail = BaseImage(url)
        video.thumbnail.url = video.thumbnail.id

        try:
            parts = self.parser.select(head, 'meta[property="video:duration"]',
                                       1).get("content").strip().split(':')
        except BrokenPageError:
            # it's probably a live, np.
            video.duration = NotAvailable
        else:
            if len(parts) == 1:
                seconds = parts[0]
                hours = minutes = 0
            elif len(parts) == 2:
                minutes, seconds = parts
                hours = 0
            elif len(parts) == 3:
                hours, minutes, seconds = parts
            else:
                raise BrokenPageError('Unable to parse duration %r' % parts)
            video.duration = datetime.timedelta(hours=int(hours),
                                                minutes=int(minutes),
                                                seconds=int(seconds))

        try:
            video.description = html2text(
                self.parser.select(head, 'meta[property="og:description"]',
                                   1).get("content")).strip() or unicode()
        except BrokenPageError:
            video.description = u''
示例#17
0
    def get_video_from_json(self, data):
        # session_id is unique per talk
        # vault_media_id is unique per page
        # (but can refer to 2 video files for dual screen)
        # solr_id is "${vault_media_id}.${conference_id}.${session_id}.$vault_media_type_id{}"

        # XXX: do we filter them or let people know about them?
        #if 'anchor' in data:
        #    if data['anchor']['href'] == '#':
        #        # file will not be accessible (not free and not logged in)
        #        return None

        if 'vault_media_id' not in data:
            return None
        media_id = int(data['vault_media_id'])
        video = GDCVaultVideo(media_id)

        # 1013679 has \n in title...
        video.title = unicode(data.get('session_name', '').replace('\n', ''))

        # TODO: strip out <p>, <br> and other html...
        # XXX: 1013422 has all 3 and !=
        if 'overview' in data:
            video.description = unicode(data['overview'])
        elif 'spell' in data:
            video.description = unicode(data['spell'])
        else:
            video.description = unicode(data.get('description', ''))

        if 'image' in data:
            video.thumbnail = BaseImage(data['image'])
            video.thumbnail.url = video.thumbnail.id

        if 'speakers_name' in data:
            video.author = unicode(", ".join(data['speakers_name']))

        if 'start_date' in data:
            video.date = parse_dt(data['start_date'])

        if 'score' in data:
            video.rating = data['score']

        video.set_empty_fields(NotAvailable)

        return video
示例#18
0
    def parse_video(self, el, video=None):
        _id = el.find('ID').text
        if _id == '-1':
            # means the video is not found
            return None

        if not video:
            video = CanalplusVideo(_id)

        infos = el.find('INFOS')
        video.title = u''
        for part in infos.find('TITRAGE'):
            if len(part.text.strip()) == 0:
                continue
            if len(video.title) > 0:
                video.title += u' — '
            video.title += part.text.strip()
        video.description = unicode(infos.find('DESCRIPTION').text)

        media = el.find('MEDIA')
        url = media.find('IMAGES').find('PETIT').text
        if url:
            video.thumbnail = BaseImage(url)
            video.thumbnail.url = video.thumbnail.id
        else:
            video.thumbnail = NotAvailable
        for format in media.find('VIDEOS'):
            if format.text is None:
                continue

            if format.tag == 'HLS':
                video.ext = u'm3u8'
                video.url = unicode(format.text)
                break

        day, month, year = map(
            int,
            infos.find('PUBLICATION').find('DATE').text.split('/'))
        hour, minute, second = map(
            int,
            infos.find('PUBLICATION').find('HEURE').text.split(':'))
        video.date = datetime(year, month, day, hour, minute, second)

        return video
示例#19
0
    def iter_videos(self):
        # When no results are found, the website returns random results
        sb = self.parser.select(self.document.getroot(), 'div.search form input.searchbox', 1)
        if sb.value == 'No Results Found':
            return

        #Extracting meta data from results page
        vidbackdrop_list = self.parser.select(self.document.getroot(), 'div.vidBackdrop    ')
        for vidbackdrop in vidbackdrop_list:
            url = self.parser.select(vidbackdrop, 'a', 1).attrib['href']
            _id = url[2:]

            video = CappedVideo(_id)
            video.set_empty_fields(NotAvailable, ('url',))

            video.title = to_unicode(self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text)
            video.author = to_unicode(self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text)

            thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id
            video.thumbnail = BaseImage(thumbnail_url)
            video.thumbnail.url = to_unicode(video.thumbnail.id)

            #we get the description field
            duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1)
            #we remove tabs and spaces
            duration_tmp2 = duration_tmp.text[7:]
            #we remove all fields exept time
            duration_tmp3 = duration_tmp2.split(' ')[0]
            #we transform it in datetime format
            parts = duration_tmp3.split(':')
            if len(parts) == 1:
                hours = minutes = 0
                seconds = parts[0]
            elif len(parts) == 2:
                hours = 0
                minutes, seconds = parts
            elif len(parts) == 3:
                hours, minutes, seconds = parts
            else:
                raise BrokenPageError('Unable to parse duration %r' % duration_tmp)

            video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))

            yield video
示例#20
0
    def iter_videos(self):
        for span in self.document.xpath('//ul[@id="list"]/li'):
            a = self.parser.select(span, 'a', 1)
            url = a.attrib['href']
            _id = re.sub(r'/showvideo/(\d+)/.*', r'\1', url)

            video = JacquieEtMichelVideo(_id)

            url = span.find('.//img').attrib['src']
            video.thumbnail = BaseImage(url)
            video.thumbnail.url = video.thumbnail.id

            title_el = self.parser.select(span, 'h2', 1)
            video.title = to_unicode(title_el.text.strip())
            video.description = self.parser.tocleanstring(
                span.xpath('.//div[@class="desc"]')[0])
            video.set_empty_fields(NotAvailable, ('url,'))

            yield video
示例#21
0
    def iter_videos(self):
        for a in self.parser.select(
                self.document.getroot(),
                'section.conference ul.media_items li.featured a.session_item'
        ):
            href = a.attrib.get('href', '')
            # print href
            m = re.match('/play/(\d+)/.*', href)
            if not m:
                continue
            # print m.group(1)
            video = GDCVaultVideo(m.group(1))

            # get title
            try:
                video.title = unicode(
                    self.parser.select(a, 'div.conference_info p strong',
                                       1).text)
            except IndexError:
                video.title = NotAvailable

            # get description
            try:
                video.description = unicode(
                    self.parser.select(a, 'div.conference_info p', 1).text)
            except IndexError:
                video.description = NotAvailable

            # get thumbnail
            img = self.parser.select(a, 'div.featured_image img', 1)
            if img is not None:
                video.thumbnail = BaseImage(img.attrib['src'])
                video.thumbnail.url = video.thumbnail.id
            else:
                video.thumbnail = NotAvailable

            #m = re.match('id-(\d+)', a.attrib.get('class', ''))
            #if not m:
            #    continue
            # FIXME
            yield video
示例#22
0
    def iter_videos(self):
        for li in self.document.getroot().xpath('//ul/li[@class="videoBox"]'):
            a = li.find('div').find('a')
            if a is None or a.find('img') is None:
                continue

            thumbnail_url = a.find('img').attrib['src']

            a = self.parser.select(li, './/a[@class="videoTitle"]', 1, 'xpath')

            url = a.attrib['href']
            _id = url[len('/watch/'):]
            _id = _id[:_id.find('/')]

            video = YoupornVideo(int(_id))
            video.title = unicode(a.text.strip())
            video.thumbnail = BaseImage(thumbnail_url)
            video.thumbnail.url = video.thumbnail.id

            hours = minutes = seconds = 0
            div = li.cssselect('div.duration')
            if len(div) > 0:
                pack = [int(s) for s in div[0].text.strip().split(':')]
                if len(pack) == 3:
                    hours, minutes, seconds = pack
                elif len(pack) == 2:
                    minutes, seconds = pack

            video.duration = datetime.timedelta(hours=hours,
                                                minutes=minutes,
                                                seconds=seconds)

            div = li.cssselect('div.rating')
            if div:
                video.rating = int(div[0].text.strip('% '))
                video.rating_max = 100

            video.set_empty_fields(NotAvailable, ('url', 'author'))

            yield video
示例#23
0
    def iter_videos(self, cat, lang='fr'):
        articles = self.document.xpath(
            '//div[@id="video_box_tab_%s"]/article' % cat)
        videos = []
        for article in articles:
            _id = article.attrib['about']
            title = self.parser.select(
                article,
                'div/div[@class="info-article "]/div/h3/a',
                1,
                method='xpath').text
            thumbnail = self.parser.select(article,
                                           'div/div/a/figure/span/span',
                                           1,
                                           method='xpath').attrib['data-src']

            video = ArteLiveVideo(_id)
            video.title = u'%s' % title
            video.thumbnail = BaseImage(thumbnail)
            video.thumbnail.url = video.thumbnail.id
            video.set_empty_fields(NotAvailable, ('url', ))
            videos.append(video)
        return videos
示例#24
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(), 'li#contentsearch'):
            title = self.parser.select(div, '#titlesearch span', 1)

            a = self.parser.select(div, 'a', 1)
            url = a.attrib['href']
            m = re.match('/video-(.*)', url)
            if not m:
                self.logger.debug('url %s does not match' % url)
                continue
            _id = m.group(1)
            video = TricTracTVVideo(_id)
            video.title = unicode(title.text)

            url = self.parser.select(div, 'img', 1).attrib['src']
            stars = self.parser.select(div, '.etoile_on')
            video.rating = len(stars)
            video.rating_max = 5

            video.thumbnail = BaseImage('http://www.trictrac.tv/%s' % url)
            video.thumbnail.url = video.thumbnail.id

            yield video
示例#25
0
    def get_video(self, video):
        if not video:
            video = NolifeTVVideo(self.group_dict['id'])

        els = self.document.getroot().xpath('//div[@data-role="content"]')
        if els and els[0] is not None:
            h3 = els[0].find('h3')
            if h3 is not None and h3.text:
                video.title = unicode(h3.text)

            h4 = els[0].find('h4')
            if h4 is not None and h4.text:
                video.title = video.title + u' - ' + h4.text

            thumb = els[0].find('p/img')
            if thumb is not None and thumb.get('src'):
                video.thumbnail = BaseImage(thumb.attrib['src'])
                video.thumbnail.url = video.thumbnail.id

            ps = els[0].findall('p')
            if len(ps) > 4:
                if ps[4].text:
                    video.description = ps[4].text
                if ps[0].text and ps[0].text != u'∞':
                    video.date = datetime.strptime(ps[0].text,
                                                   '%d/%m/%Y').date()

                for text in ps[2].xpath('.//text()'):
                    m = re.search(r'[^\d]*((\d+):)?(\d+)s?', text)
                    if m:
                        if m.group(2):
                            minutes = int(m.group(2))
                        else:
                            minutes = 0
                        video.duration = timedelta(minutes=minutes,
                                                   seconds=int(m.group(3)))
            return video
示例#26
0
    def iter_videos(self):
        try:
            ul = self.parser.select(self.document.getroot(),
                                    'div.container-videos ul', 1)
        except BrokenPageError:
            # It means there are no results.
            return
        for li in ul.findall('li'):
            url = li.find('a').find('img').attrib['src']

            id = re.sub(self.URL_REGEXP, r'\2', url)
            video = InaVideo(id)

            video.thumbnail = BaseImage(u'http://boutique.ina.fr%s' % url)
            video.thumbnail.url = video.thumbnail.id

            # The title is poorly encoded is the source, we have to encode/decode it again
            video.title = unicode(self.parser.select(
                li, 'p.titre',
                1).text).encode('raw_unicode_escape').decode('utf8')

            date = self.parser.select(li, 'p.date', 1).text
            day, month, year = [int(s) for s in date.split('/')]
            video.date = datetime.datetime(year, month, day)

            duration = self.parser.select(li, 'p.duree', 1).text
            m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration)
            if m:
                video.duration = datetime.timedelta(hours=int(m.group(2) or 0),
                                                    minutes=int(
                                                        m.group(4) or 0),
                                                    seconds=int(m.group(5)))
            else:
                raise BrokenPageError('Unable to match duration (%r)' %
                                      duration)

            yield video
示例#27
0
    def get_video(self, video):
        if not video:
            video = InaVideo(self.get_id())

        video.title = u'%s' % self.parser.select(self.document.getroot(),
                                                 '//rss/channel/item/title',
                                                 1,
                                                 method='xpath').text

        _image = u'%s' % self.parser.select(self.document.getroot(),
                                            '//rss/channel/item/media:content/media:thumbnail',
                                            1,
                                            method='xpath',
                                            namespaces={'media': 'http://search.yahoo.com/mrss/'}).attrib['url']
        video.thumbnail = BaseImage(_image)
        video.thumbnail.url = video.thumbnail.id

        video.url = u'%s' % self.parser.select(self.document.getroot(),
                                               '//rss/channel/item/media:content',
                                               1,
                                               method='xpath',
                                               namespaces={'media': 'http://search.yahoo.com/mrss/'}).attrib['url']

        _date = self.parser.select(self.document.getroot(),
                                   '//rss/channel/item/pubDate',
                                   1,
                                   method='xpath').text
        video.date = datetime.strptime(_date[:-6], '%a, %d %b %Y %H:%M:%S')

        video.description = u'%s' % self.parser.select(self.document.getroot(),
                                                       '//rss/channel/item/description',
                                                       1,
                                                       method='xpath').text

        video.set_empty_fields(NotAvailable)
        return video
示例#28
0
 def obj_thumbnail(self):
     thumbnail = BaseImage(self.xpath('./a/img')[0].attrib['src'])
     thumbnail.url = thumbnail.id
     return thumbnail
示例#29
0
文件: index.py 项目: Boussadia/weboob
 def obj_thumbnail(self):
     thumbnail = BaseImage(self.xpath(".//img")[0].attrib["data-original"])
     thumbnail.url = thumbnail.id
     return thumbnail
示例#30
0
 def obj_thumbnail(self):
     thumbnail = BaseImage(self.xpath('.//img')[0].attrib['data-original'])
     thumbnail.url = thumbnail.id
     return thumbnail
示例#31
0
文件: pages.py 项目: Konubinix/weboob
 def obj_thumbnail(self):
     url = Format('http://pluzz.francetv.fr%s', Dict['image'])(self)
     thumbnail = BaseImage(url)
     thumbnail.url = thumbnail.id
     return thumbnail
示例#32
0
 def obj_thumbnail(self):
     thumbnail = BaseImage(Dict('thumbnailUrl')(self.el))
     thumbnail.url = thumbnail.id
     return thumbnail
示例#33
0
 def obj_thumbnail(self):
     thumbnail = BaseImage(self.xpath('./a/img')[0].attrib['src'])
     thumbnail.url = thumbnail.id
     return thumbnail
示例#34
0
 def obj_thumbnail(self):
     thumbnail = BaseImage(CleanText('//image[1]/url')(self))
     thumbnail.url = thumbnail.id
     return thumbnail
示例#35
0
 def obj_thumbnail(self):
     if 'path_img_emission' in self.el:
         thumbnail = BaseImage(Dict('path_img_emission')(self))
         thumbnail.url = thumbnail.id
         return thumbnail
示例#36
0
 def obj_thumbnail(self):
     url = Format('http://pluzz.francetv.fr%s', Dict['image'])(self)
     thumbnail = BaseImage(url)
     thumbnail.url = thumbnail.id
     return thumbnail
示例#37
0
 def obj_thumbnail(self):
     thumbnail_url = Attr('./img', 'src')(self)
     thumbnail = BaseImage(thumbnail_url)
     thumbnail.url = thumbnail.id
     return thumbnail
示例#38
0
文件: pages.py 项目: Konubinix/weboob
 def obj_thumbnail(self):
     url = Attr('a/img[@class="resultat-vignette"]', 'data-src')(self)
     thumbnail = BaseImage(url)
     thumbnail.url = thumbnail.id
     return thumbnail
示例#39
0
 def obj_thumbnail(self):
     thumbnail = BaseImage(CleanText('//div[@itemprop="video"]/span[@itemprop="thumbnail"]/link/@href')(self.el))
     thumbnail.url = thumbnail.id
     return thumbnail
示例#40
0
 def obj_thumbnail(self):
     thumbnail = BaseImage(self.xpath("./a/img")[0].attrib["src"])
     thumbnail.url = thumbnail.id
     return thumbnail
示例#41
0
 def obj_thumbnail(self):
     url = NormalizeThumbnail(CleanText('/html/head/meta[@property="og:image"]/@content'))(self)
     if url:
         thumbnail = BaseImage(url)
         thumbnail.url = thumbnail.id
         return thumbnail
示例#42
0
 def obj_thumbnail(self):
     url = Attr('a/img[@class="resultat-vignette"]', 'data-src')(self)
     thumbnail = BaseImage(url)
     thumbnail.url = thumbnail.id
     return thumbnail
示例#43
0
文件: pages.py 项目: Boussadia/weboob
 def obj_thumbnail(self):
     url = Attr('a[@class="vignette"]/img', 'data-src')(self)
     thumbnail = BaseImage(url)
     thumbnail.url = thumbnail.id
     return thumbnail