Python BrokenPageError.BrokenPageError примеры, weboob.deprecated.browser.BrokenPageError.BrokenPageError Python примеры использования

Пример #1

0

Показать файл

Файл: pages.py Проект: yang2lalang/weboob

    def get_list(self):
        accounts = []

        txt = self.get_from_js('_data = new Array(', ');', is_list=True)

        if txt is None:
            raise BrokenPageError('Unable to find accounts list in scripts')

        data = json.loads('[%s]' % txt.replace("'", '"'))

        for line in data:
            a = Account()
            a.id = line[self.COL_ID].replace(' ', '')
            a._acc_nb = a.id.split('_')[0] if len(a.id.split('_')) > 1 else None
            fp = StringIO(unicode(line[self.COL_LABEL]).encode(self.browser.ENCODING))
            a.label = self.parser.tocleanstring(self.parser.parse(fp, self.browser.ENCODING).xpath('//div[@class="libelleCompteTDB"]')[0])
            # This account can be multiple life insurance accounts
            if a.label == 'ASSURANCE VIE-BON CAPI-SCPI-DIVERS *':
                continue

            a.balance = Decimal(FrenchTransaction.clean_amount(line[self.COL_BALANCE]))
            a.currency = a.get_currency(line[self.COL_BALANCE])
            a.type = self.get_account_type(a.label)
            if line[self.COL_HISTORY] == 'true':
                a._inv = False
                a._link = self.get_history_link()
                a._args = {'_eventId':         'clicDetailCompte',
                           '_ipc_eventValue':  '',
                           '_ipc_fireEvent':   '',
                           'deviseAffichee':   'DEVISE',
                           'execution':        self.get_execution(),
                           'idCompteClique':   line[self.COL_ID],
                          }
            else:
                a._inv = True
                a._args = {'_ipc_eventValue':  line[self.COL_ID],
                           '_ipc_fireEvent':   line[self.COL_FIRE_EVENT],
                          }
                a._link = self.document.xpath('//form[@name="changePageForm"]')[0].attrib['action']

            if a.type is Account.TYPE_CARD:
                a.coming = a.balance
                a.balance = Decimal('0.0')

            accounts.append(a)

        return accounts

Пример #2

0

Показать файл

Файл: pages.py Проект: skeptycal/weboob-devel

    def get_list(self):
        accounts = []

        txt = self.get_from_js('_data = new Array(', ');', is_list=True)

        if txt is None:
            raise BrokenPageError('Unable to find accounts list in scripts')

        data = json.loads('[%s]' % txt.replace("'", '"'))

        for line in data:
            a = Account()
            a.id = line[self.COL_ID].replace(' ', '')
            fp = StringIO(
                unicode(line[self.COL_LABEL]).encode(self.browser.ENCODING))
            a.label = self.parser.tocleanstring(
                self.parser.parse(fp, self.browser.ENCODING).xpath(
                    '//div[@class="libelleCompteTDB"]')[0])
            a.balance = Decimal(
                FrenchTransaction.clean_amount(line[self.COL_BALANCE]))
            a.currency = a.get_currency(line[self.COL_BALANCE])
            a.type = self.get_account_type(a.label)
            a._link = self.get_history_link()
            if line[self.COL_HISTORY] == 'true':
                a._args = {
                    '_eventId': 'clicDetailCompte',
                    '_ipc_eventValue': '',
                    '_ipc_fireEvent': '',
                    'deviseAffichee': 'DEVISE',
                    'execution': self.get_execution(),
                    'idCompteClique': line[self.COL_ID],
                }
            else:
                a._args = None

            if a.id.find('_CarteVisa') >= 0:
                accounts[-1]._card_ids.append(a._args)
                if not accounts[-1].coming:
                    accounts[-1].coming = Decimal('0.0')
                accounts[-1].coming += a.balance
                continue

            a._card_ids = []
            accounts.append(a)

        return iter(accounts)

Пример #3

0

Показать файл

Файл: pages.py Проект: skeptycal/weboob-devel

    def set_video_metadata(self, video):

        head = self.parser.select(self.document.getroot(), 'head', 1)

        video.title = unicode(
            self.parser.select(head, 'meta[property="og:title"]',
                               1).get("content")).strip()
        video.author = unicode(
            self.parser.select(head, 'meta[name="author"]',
                               1).get("content")).strip()

        url = unicode(
            self.parser.select(head, 'meta[property="og:image"]',
                               1).get("content")).strip()
        # remove the useless anti-caching
        url = re.sub('\?\d+', '', url)
        video.thumbnail = BaseImage(url)
        video.thumbnail.url = video.thumbnail.id

        try:
            parts = self.parser.select(head, 'meta[property="video:duration"]',
                                       1).get("content").strip().split(':')
        except BrokenPageError:
            # it's probably a live, np.
            video.duration = NotAvailable
        else:
            if len(parts) == 1:
                seconds = parts[0]
                hours = minutes = 0
            elif len(parts) == 2:
                minutes, seconds = parts
                hours = 0
            elif len(parts) == 3:
                hours, minutes, seconds = parts
            else:
                raise BrokenPageError('Unable to parse duration %r' % parts)
            video.duration = datetime.timedelta(hours=int(hours),
                                                minutes=int(minutes),
                                                seconds=int(seconds))

        try:
            video.description = html2text(
                self.parser.select(head, 'meta[property="og:description"]',
                                   1).get("content")).strip() or unicode()
        except BrokenPageError:
            video.description = u''

Пример #4

0

Показать файл

    def go_on_accounts_list(self):
        for taskInfoOID in self.ACCOUNT_URLS:
            self.location(self.buildurl('/cyber/internet/StartTask.do', taskInfoOID=taskInfoOID, token=self.token))
            if not self.page.is_error():
                if self.page.pop_up():
                    self.logger.debug('Popup displayed, retry')
                    self.location(self.buildurl('/cyber/internet/StartTask.do', taskInfoOID=taskInfoOID, token=self.token))
                self.ACCOUNT_URLS = [taskInfoOID]
                break
        else:
            raise BrokenPageError('Unable to go on the accounts list page')

        if self.page.is_short_list():
            self.select_form(nr=0)
            self.set_all_readonly(False)
            self['dialogActionPerformed'] = 'EQUIPEMENT_COMPLET'
            self['token'] = self.page.build_token(self['token'])
            self.submit()

Пример #5

0

Показать файл

    def iter_videos(self):
        # When no results are found, the website returns random results
        sb = self.parser.select(self.document.getroot(), 'div.search form input.searchbox', 1)
        if sb.value == 'No Results Found':
            return

        #Extracting meta data from results page
        vidbackdrop_list = self.parser.select(self.document.getroot(), 'div.vidBackdrop    ')
        for vidbackdrop in vidbackdrop_list:
            url = self.parser.select(vidbackdrop, 'a', 1).attrib['href']
            _id = url[2:]

            video = CappedVideo(_id)
            video.set_empty_fields(NotAvailable, ('url',))

            video.title = to_unicode(self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text)
            video.author = to_unicode(self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text)

            thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id
            video.thumbnail = BaseImage(thumbnail_url)
            video.thumbnail.url = to_unicode(video.thumbnail.id)

            #we get the description field
            duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1)
            #we remove tabs and spaces
            duration_tmp2 = duration_tmp.text[7:]
            #we remove all fields exept time
            duration_tmp3 = duration_tmp2.split(' ')[0]
            #we transform it in datetime format
            parts = duration_tmp3.split(':')
            if len(parts) == 1:
                hours = minutes = 0
                seconds = parts[0]
            elif len(parts) == 2:
                hours = 0
                minutes, seconds = parts
            elif len(parts) == 3:
                hours, minutes, seconds = parts
            else:
                raise BrokenPageError('Unable to parse duration %r' % duration_tmp)

            video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds))

            yield video

Пример #6

0

Показать файл

    def get_history(self):
        #DAT account can't have transaction
        if self.document.xpath('//table[@id="table-dat"]'):
            return
        #These accounts have investments, no transactions
        if self.document.xpath('//table[@id="InfosPortefeuille"]'):
            return
        tables = self.document.xpath('//table[@id="table-detail-operation"]')
        if len(tables) == 0:
            tables = self.document.xpath('//table[@id="table-detail"]')
        if len(tables) == 0:
            tables = self.document.getroot().cssselect('table.table-detail')
        if len(tables) == 0:
            try:
                self.parser.select(self.document.getroot(), 'td.no-result', 1)
            except BrokenPageError:
                raise BrokenPageError('Unable to find table?')
            else:
                return

        for tr in tables[0].xpath('.//tr'):
            tds = tr.findall('td')
            if len(tds) < 4:
                continue

            t = Transaction()
            date = u''.join(
                [txt.strip() for txt in tds[self.COL_DATE].itertext()])
            raw = u''.join(
                [txt.strip() for txt in tds[self.COL_TEXT].itertext()])
            debit = self.parse_number(u''.join(
                [txt.strip() for txt in tds[self.COL_DEBIT].itertext()]))
            credit = self.parse_number(u''.join(
                [txt.strip() for txt in tds[self.COL_CREDIT].itertext()]))

            t.parse(date, re.sub(r'[ ]+', ' ', raw))
            t.set_amount(credit, debit)

            yield t

Пример #7

0

Показать файл

Файл: pages.py Проект: linura/weboob

    def get_video(self, video=None):
        _id = to_unicode(self.group_dict['id'])
        if video is None:
            video = JacquieEtMichelVideo(_id)
        title_el = self.parser.select(self.document.getroot(), 'h1', 1)
        video.title = to_unicode(title_el.text.strip())
        video.description = self.document.xpath(
            '//meta[@name="description"]')[0].attrib['content']

        for script in self.document.xpath('.//script'):
            if script.text is None:
                continue
            m = re.search('"(http://[^"]+.mp4)"', script.text, re.MULTILINE)
            if m:
                video.url = to_unicode(m.group(1))
                break

        if not video.url:
            raise BrokenPageError('Unable to find URL')

        video.set_empty_fields(NotAvailable)

        return video

Пример #8

0

Показать файл

Файл: browser.py Проект: yang2lalang/weboob

    def get_content(self, _id):
        url, _id = self.parse_id(_id)

        if url is None:
            return None

        self.location(url)
        self.page.url = self.absurl(url)

        if self.is_on_page(CommentPage):
            content = self.page.get_comment()
        elif self.is_on_page(ContentPage):
            m = re.match('.*#comment-(\d+)$', url)
            if m:
                content = self.page.get_comment(int(m.group(1)))
            else:
                content = self.page.get_article()
        else:
            raise BrokenPageError('Not on a content or comment page (%r)' % self.page)

        if _id is not None:
            content.id = _id
        return content

Пример #9

0

Показать файл

Файл: search.py Проект: sourcery-ai-bot/weboob

    def iter_videos(self):
        try:
            ul = self.parser.select(self.document.getroot(),
                                    'div.container-videos ul', 1)
        except BrokenPageError:
            # It means there are no results.
            return
        for li in ul.findall('li'):
            url = li.find('a').find('img').attrib['src']

            id = re.sub(self.URL_REGEXP, r'\2', url)
            video = InaVideo(id)

            video.thumbnail = BaseImage(u'http://boutique.ina.fr%s' % url)
            video.thumbnail.url = video.thumbnail.id

            # The title is poorly encoded is the source, we have to encode/decode it again
            video.title = unicode(self.parser.select(
                li, 'p.titre',
                1).text).encode('raw_unicode_escape').decode('utf8')

            date = self.parser.select(li, 'p.date', 1).text
            day, month, year = [int(s) for s in date.split('/')]
            video.date = datetime.datetime(year, month, day)

            duration = self.parser.select(li, 'p.duree', 1).text
            m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration)
            if m:
                video.duration = datetime.timedelta(hours=int(m.group(2) or 0),
                                                    minutes=int(
                                                        m.group(4) or 0),
                                                    seconds=int(m.group(5)))
            else:
                raise BrokenPageError('Unable to match duration (%r)' %
                                      duration)

            yield video

Пример #10

0

Показать файл

Файл: pages.py Проект: dasimon/weboob

    def iter_accounts(self, next_pages):
        params = self.get_params()

        account = None
        currency = None
        for th in self.document.xpath('//table[@id="TabCtes"]//thead//th'):
            m = re.match('.*\((\w+)\)$', th.text)
            if m and currency is None:
                currency = Account.get_currency(m.group(1))

        for tr in self.document.xpath('//table[@id="TabCtes"]/tbody/tr'):
            cols = tr.xpath('./td')

            id = self.parser.tocleanstring(cols[self.COL_ID])
            if len(id) > 0:
                if account is not None:
                    yield account
                account = Account()
                account.id = id.replace(' ', '')
                account.type = Account.TYPE_CARD
                account.balance = account.coming = Decimal('0')
                account._next_debit = datetime.date.today()
                account._prev_debit = datetime.date(2000, 1, 1)
                account.label = u' '.join([
                    self.parser.tocleanstring(cols[self.COL_TYPE]),
                    self.parser.tocleanstring(cols[self.COL_LABEL])
                ])
                account.currency = currency
                account._params = None
                account._invest_params = None
                account._coming_params = params.copy()
                account._coming_params[
                    'dialogActionPerformed'] = 'SELECTION_ENCOURS_CARTE'
                account._coming_params[
                    'attribute($SEL_$%s)' %
                    tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split(
                        '_', 1)[1]
            elif account is None:
                raise BrokenPageError('Unable to find accounts on cards page')
            else:
                account._params = params.copy()
                account._params[
                    'dialogActionPerformed'] = 'SELECTION_ENCOURS_CARTE'
                account._params[
                    'attribute($SEL_$%s)' %
                    tr.attrib['id'].split('_')[0]] = tr.attrib['id'].split(
                        '_', 1)[1]

            date_col = self.parser.tocleanstring(cols[self.COL_DATE])
            m = re.search('(\d+)/(\d+)/(\d+)', date_col)
            if not m:
                self.logger.warning('Unable to parse date %r' % date_col)
                continue

            date = datetime.date(*reversed(map(int, m.groups())))
            if date.year < 100:
                date = date.replace(year=date.year + 2000)

            amount = Decimal(
                FrenchTransaction.clean_amount(
                    self.parser.tocleanstring(cols[self.COL_AMOUNT])))

            if not date_col.endswith('(1)'):
                # debited
                account.coming += -abs(amount)
                account._next_debit = date
            elif date > account._prev_debit:
                account._prev_balance = -abs(amount)
                account._prev_debit = date

        if account is not None:
            yield account

        # Needed to preserve navigation.
        btn = self.document.xpath('.//button/span[text()="Retour"]')
        if len(btn) > 0:
            btn = btn[0].getparent()
            actions = self.get_button_actions()
            _params = params.copy()
            _params.update(actions[btn.attrib['id']])
            self.browser.openurl('/cyber/internet/ContinueTask.do',
                                 urllib.urlencode(_params))

Пример #11

0

Показать файл

    def get_list(self):
        def check_valid_url(url):
            pattern = [
                '/restitution/cns_detailAVPAT.html',
                '/restitution/cns_detailPea.html',
                '/restitution/cns_detailAlterna.html',
            ]

            for p in pattern:
                if url.startswith(p):
                    return False
            return True

        for tr in self.document.getiterator('tr'):
            if 'LGNTableRow' not in tr.attrib.get('class', '').split():
                continue

            account = Account()
            for td in tr.getiterator('td'):
                if td.attrib.get('headers', '') == 'TypeCompte':
                    a = td.find('a')
                    if a is None:
                        break
                    account.label = self.parser.tocleanstring(a)
                    account._link_id = a.get('href', '')
                    for pattern, actype in self.TYPES.iteritems():
                        if account.label.startswith(pattern):
                            account.type = actype
                            break
                    else:
                        if account._link_id.startswith('/asv/asvcns10.html'):
                            account.type = Account.TYPE_LIFE_INSURANCE
                    # Website crashes when going on theses URLs
                    if not check_valid_url(account._link_id):
                        account._link_id = None

                elif td.attrib.get('headers', '') == 'NumeroCompte':
                    account.id = self.parser.tocleanstring(td).replace(
                        u'\xa0', '')

                elif td.attrib.get('headers', '') == 'Libelle':
                    text = self.parser.tocleanstring(td)
                    if text != '':
                        account.label = text

                elif td.attrib.get('headers', '') == 'Solde':
                    div = td.xpath('./div[@class="Solde"]')
                    if len(div) > 0:
                        balance = self.parser.tocleanstring(div[0])
                        if len(balance) > 0 and balance not in ('ANNULEE',
                                                                'OPPOSITION'):
                            try:
                                account.balance = Decimal(
                                    FrenchTransaction.clean_amount(balance))
                            except InvalidOperation:
                                raise BrokenPageError(
                                    'Unable to parse balance %r' % balance)
                            account.currency = account.get_currency(balance)
                        else:
                            account.balance = NotAvailable

            if not account.label or empty(account.balance):
                continue

            if account._link_id and 'CARTE_' in account._link_id:
                account.type = account.TYPE_CARD

            if account.type == Account.TYPE_UNKNOWN:
                self.logger.debug('Unknown account type: %s', account.label)

            yield account

Пример #12

0

Показать файл

    def get_video(self, video=None):
        # check for slides id variant
        want_slides = False
        m = re.match('.*#slides', self.url)
        if m:
            want_slides = True
            # not sure it's safe
            self.group_dict['id'] += '#slides'

        if video is None:
            video = GDCVaultVideo(self.group_dict['id'])

        # the config file has it too, but in CDATA and only for type 4
        obj = self.parser.select(self.document.getroot(), 'title')
        title = None
        if len(obj) > 0:
            try:
                title = unicode(obj[0].text)
            except UnicodeDecodeError as e:
                title = None

        if title is None:
            obj = self.parser.select(self.document.getroot(),
                                     'meta[name=title]')
            if len(obj) > 0:
                if 'content' in obj[0].attrib:
                    try:
                        # FIXME: 1013483 has buggus title (latin1)
                        # for now we just pass it as-is
                        title = obj[0].attrib['content']
                    except UnicodeDecodeError as e:
                        # XXX: this doesn't even works!?
                        title = obj[0].attrib['content'].decode('iso-5589-15')

        if title is not None:
            title = title.strip()
            m = re.match('GDC Vault\s+-\s+(.*)', title)
            if m:
                title = m.group(1)
            video.title = title

        #TODO: POST back the title to /search.php and filter == id to get
        # cleaner (JSON) data... (though it'd be much slower)

        # try to find an iframe (type 3 and 4)
        obj = self.parser.select(self.document.getroot(), 'iframe')
        if len(obj) == 0:
            # type 1 or 2 (swf+js)
            # find which script element contains the swf args
            for script in self.parser.select(self.document.getroot(),
                                             'script'):
                m = re.match(
                    ".*new SWFObject.*addVariable\('type', '(.*)'\).*",
                    unicode(script.text), re.DOTALL)
                if m:
                    video.ext = m.group(1)

                m = re.match(
                    ".*new SWFObject.*addVariable\(\"file\", encodeURIComponent\(\"(.*)\"\)\).*",
                    unicode(script.text), re.DOTALL)
                if m:
                    video.url = "http://gdcvault.com%s" % (m.group(1))
                    # TODO: for non-free (like 769),
                    # must be logged to use /mediaProxy.php

                    # FIXME: doesn't seem to work yet, we get 2 bytes as html
                    # 769 should give:
                    # http://twvideo01.ubm-us.net/o1/gdcradio-net/2007/gdc/GDC07-4889.mp3
                    # HACK: we use mechanize directly here for now... FIXME
                    #print "asking for redirect on '%s'" % (video.url)
                    #self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]]
                    #print self.browser.addheaders
                    self.browser.set_handle_redirect(False)
                    try:
                        self.browser.open_novisit(video.url)
                        # headers = req.info()
                        # if headers.get('Content-Type', '') == 'text/html' and headers.get('Content-Length', '') == '2':
                        # print 'BUG'

                        #print req.code
                    except HTTPError as e:
                        #print e.getcode()
                        if e.getcode() == 302 and hasattr(e, 'hdrs'):
                            #print e.hdrs['Location']
                            video.url = unicode(e.hdrs['Location'])
                    self.browser.set_handle_redirect(True)

                    video.set_empty_fields(NotAvailable)
                    return video

            #XXX: raise error?
            return None

        obj = obj[0]
        if obj is None:
            return None
        # type 3 or 4 (iframe)
        # get the config file for the rest
        iframe_url = obj.attrib['src']

        # 1015020 has a boggus url
        m = re.match('http:/event(.+)', iframe_url)
        if m:
            iframe_url = 'http://event' + m.group(1)

        # print iframe_url
        # 1013798 has player169.html
        # 1012186 has player16x9.html
        # some other have /somethingplayer.html...
        # 1441 has a space in the xml filename, which we must not strip
        m = re.match(
            '(http:.*/)[^/]*player[0-9a-z]*\.html\?.*xmlURL=([^&]+\.xml).*\&token=([^& ]+)',
            iframe_url)

        if not m:
            m = re.match('/play/mediaProxy\.php\?sid=(\d+)', iframe_url)
            if m is None:
                return None
            # TODO: must be logged to use /mediaProxy.php
            # type 3 (pdf slides)
            video.ext = u'pdf'
            video.url = "http://gdcvault.com%s" % (unicode(iframe_url))

            # HACK: we use mechanize directly here for now... FIXME
            # print "asking for redirect on '%s'" % (video.url)
            self.browser.set_handle_redirect(False)
            try:
                self.browser.open_novisit(video.url)
            except HTTPError as e:
                if e.getcode() == 302 and hasattr(e, 'hdrs'):
                    video.url = unicode(e.hdrs['Location'])
            self.browser.set_handle_redirect(True)

            video.set_empty_fields(NotAvailable)
            return video

        # type 4 (dual screen video)

        # token doesn't actually seem required
        # 1441 has a space in the xml filename
        xml_filename = urllib.quote(m.group(2))
        config_url = m.group(1) + xml_filename + '?token=' + m.group(3)

        # self.browser.addheaders += [['Referer', 'http://gdcvault.com/play/%s' % self.group_dict['id']]]
        # print self.browser.addheaders
        # TODO: fix for 1015021 & others (forbidden)
        #config = self.browser.openurl(config_url).read()
        config = self.browser.get_document(self.browser.openurl(config_url))

        obj = self.parser.select(config.getroot(), 'akamaihost', 1)
        host = obj.text
        if host is None:
            raise BrokenPageError('Missing tag in xml config file')

        if host == "smil":
            # the rtmp URL is described in a smil file,
            # with several available bitrates
            obj = self.parser.select(config.getroot(), 'speakervideo', 1)
            smil = self.browser.get_document(self.browser.openurl(obj.text))
            obj = self.parser.select(smil.getroot(), 'meta', 1)
            # TODO: error checking
            base = obj.attrib.get('base', '')
            best_bitrate = 0
            path = None
            obj = self.parser.select(smil.getroot(), 'video')
            # choose the best bitrate
            for o in obj:
                rate = int(o.attrib.get('system-bitrate', 0))
                if rate > best_bitrate:
                    path = o.attrib.get('src', '')
            video.url = unicode(base + '/' + path)

        else:
            # not smil, the rtmp url is directly here as host + path
            # for id 1373 host is missing '/ondemand'
            # only add it when only a domain is specified without path
            m = re.match('^[^\/]+$', host)
            if m:
                host += "/ondemand"

            videos = {}

            obj = self.parser.select(config.getroot(), 'speakervideo', 1)
            if obj.text is not None:
                videos['speaker'] = 'rtmp://' + host + '/' + urllib.quote(
                    obj.text)

            obj = self.parser.select(config.getroot(), 'slidevideo', 1)
            if obj.text is not None:
                videos['slides'] = 'rtmp://' + host + '/' + urllib.quote(
                    obj.text)

            # print videos
            # XXX
            if 'speaker' in videos:
                video.url = unicode(videos['speaker'])
            elif 'slides' in videos:
                # 1016627 only has slides, so fallback to them
                video.url = unicode(videos['slides'])

            if want_slides:
                if 'slides' in videos:
                    video.url = unicode(videos['slides'])
            # if video.url is none: raise ? XXX

        obj = self.parser.select(config.getroot(), 'date', 1)
        if obj.text is not None:
            # 1016634 has "Invalid Date"
            try:
                video.date = parse_dt(obj.text)
            except ValueError as e:
                video.date = NotAvailable

        obj = self.parser.select(config.getroot(), 'duration', 1)
        m = re.match('(\d\d):(\d\d):(\d\d)', obj.text)
        if m:
            video.duration = datetime.timedelta(hours=int(m.group(1)),
                                                minutes=int(m.group(2)),
                                                seconds=int(m.group(3)))

        obj = self.parser.select(config.getroot(), 'speaker', 1)
        #print obj.text_content()

        #self.set_details(video)

        video.set_empty_fields(NotAvailable)
        return video

        obj = self.parser.select(self.document.getroot(), 'title')
        if len(obj) < 1:
            return None
        title = obj[0].text.strip()
        m = re.match('GDC Vault\s+-\s+(.*)', title)
        if m:
            title = m.group(1)

Пример #13

0

Показать файл

    def get_accounts(self):
        accounts = {}
        content = self.document.xpath(
            '//div[@id="main"]//div[@class="col first"]')[0]

        # Primary currency account
        primary_account = Account()
        primary_account.type = Account.TYPE_CHECKING

        # Total currency balance.
        # If there are multiple currencies, this balance is all currencies
        # converted to the main currency.
        try:
            balance = content.xpath('.//h3/span[@class="balance"]')
            if not balance:
                balance = content.xpath('.//li[@class="balance"]//span/strong')
            balance = balance[0].text_content().strip()
            primary_account.balance = AmTr.decimal_amount(balance)
            primary_account.currency = Account.get_currency(balance)
            primary_account.id = unicode(primary_account.currency)
            primary_account.label = u'%s %s*' % (self.browser.username,
                                                 balance.split()[-1])
        except IndexError:
            primary_account.balance = NotAvailable
            primary_account.label = u'%s' % (self.browser.username)
        accounts[primary_account.id] = primary_account

        # The following code will only work if the user enabled multiple currencies.
        balance = content.xpath(
            './/div[@class="body"]//ul/li[@class="balance"]/span')
        table = content.xpath('.//table[@id="balanceDetails"]//tbody//tr')

        # sanity check
        if bool(balance) is not bool(table):
            raise BrokenPageError(
                'Unable to find all required multiple currency entries')

        # Primary currency balance.
        # If the user enabled multiple currencies, we get this one instead.
        # An Account object has only one currency; secondary currencies should be other accounts.
        if balance:
            balance = balance[0].text_content().strip()
            primary_account.balance = AmTr.decimal_amount(balance)
            # The primary currency of the "head balance" is the same; ensure we got the right one
            assert primary_account.currency == primary_account.get_currency(
                balance)

        for row in table:
            balance = row.xpath('.//td')[-1].text_content().strip()
            account = Account()
            account.type = Account.TYPE_CHECKING
            # XXX it ignores 5+ devises, so it's bad, but it prevents a crash, cf #1216
            try:
                account.balance = AmTr.decimal_amount(balance)
            except InvalidOperation:
                continue
            account.currency = Account.get_currency(balance)
            account.id = unicode(account.currency)
            account.label = u'%s %s' % (self.browser.username,
                                        balance.split()[-1])
            if account.id == primary_account.id:
                assert account.balance == primary_account.balance
                assert account.currency == primary_account.currency
            elif account.currency:
                accounts[account.id] = account

        return accounts

Python BrokenPageError.BrokenPageError примеры использования