Python BeautifulSoup.find примеры использования

Язык программирования: Python

Пространство имен/Пакет: gotovienna.BeautifulSoup

Класс/Тип: BeautifulSoup

Метод/Функция: find

Примеров на hotexamples.com: 3

Python BeautifulSoup.find - 3 примера найдено. Это лучшие примеры Python кода для gotovienna.BeautifulSoup.BeautifulSoup.find, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

findAll(4)

find(3)

Основные методы

findAll (4)

find (3)

Пример #1

Показать файл

Файл: routing.py Проект: kelvan/gotoVienna

class sParser:
    """ Parser for search response
    """

    def __init__(self, html):
        self.soup = BeautifulSoup(html)

    def check_page(self):
        if self.soup.find('form', {'id': 'form_efaresults'}):
            return PageType.RESULT

        if self.soup.find('div', {'class':'form_error'}):
            return PageType.CORRECTION

        return PageType.UNKNOWN

    state = property(check_page)

    def get_correction(self):
        names_origin = self.soup.find('select', {'id': 'nameList_origin'})
        names_destination = self.soup.find('select', {'id': 'nameList_destination'})
        places_origin = self.soup.find('select', {'id': 'placeList_origin'})
        places_destination = self.soup.find('select', {'id': 'placeList_destination'})


        if any([names_origin, names_destination, places_origin, places_destination]):
            dict = {}

            if names_origin:
                dict['origin'] = map(lambda x: x.text,
                                     names_origin.findAll('option'))
            if names_destination:
                dict['destination'] = map(lambda x: x.text,
                                          names_destination.findAll('option'))

            if places_origin:
                dict['place_origin'] = map(lambda x: x.text,
                                           names_origin.findAll('option'))
            if names_destination:
                dict['place_destination'] = map(lambda x: x.text,
                                                names_destination.findAll('option'))

            return dict

        else:
            raise ParserError('Unable to parse html')

    def get_result(self):
        return rParser(str(self.soup))

Пример #2

Показать файл

Файл: realtime.py Проект: kelvan/gotoVienna

    def parse_departures(self, html):
        bs = BeautifulSoup(html)
        dep = []

        # Check for error messages
        msg = bs.findAll('span', {'class': 'rot fett'})
        if msg and len(msg) > 0 and unicode(msg[0].text).find(u'technischen St') > 0:
            print '\n'.join(map(lambda x: x.text.replace('&nbsp;', ''), msg))
            return []
        
        errtable = bs.find('table', {'class':'errortable'})
        if errtable and clean_text(errtable.text):
            print "Errortable found"
            print errtable.text
            return []

        if bs.table and bs.table.tr:
            st_td = bs.table.tr.findAll('td')
        
            if st_td:
                station = clean_text(st_td[-1].text)
            else:
                print "Unexpected Error: Stationname not found"
                print "Debug:", st_td.encode('UTF-8')
        else:
            print "Unexpected Error: table or tr not found"
            print bs
            return []
        
        # zusatztext crap
        zt = bs.find('td', {'class':'zusatztext'})
        if zt:
            ma = ZUSATZTEXT_REGEX.search(zt.text)
            if ma:
                line = ma.group(1)
                direction = ma.group(2)
                if direction == direction.upper():
                    direction = direction.capitalize()
                tim = int(ma.group(3))
                d = Departure(line=line, direction=direction,
                              lowfloor=True, station=station, time=tim)
                dep.append(d)
            else:
                print zt.text
        
        table = bs.find('table', {'class':'imagetable'})
        if not table:
            print "table not found"
            return []
        
        if errtable:
            print "Warning: Empty errortable found"
            return dep
        
        trs = table.findAll('tr')
        
        for tr in trs[1:]:
            tds = tr.findAll('td')
            line = clean_text(tds[0].text)
            direction = clean_text(tds[1].text)
            
            if direction.startswith(line):
                direction = direction.lstrip(line).strip()
                
            if direction == direction.upper():
                direction = direction.capitalize()
            
            lf_img = tds[-1].img
            
            lowfloor = lf_img and lf_img.has_key('alt')
            
            d = {'line': line,
                 'direction': direction,
                 'lowfloor': lowfloor,
                 'station': station}

            # parse time
            tim = clean_text(tds[2].text)
            dts = DELTATIME_REGEX.search(tim)
            abs = ABSTIME_REGEX.search(tim)
            
            if tim.find(u'...in K\xfcrze') >= 0:
                d['time'] = 0
            elif abs:
                d['time'] = calc_datetime(abs.group(1))
            elif tim.isdigit():
                d['time'] = int(tim)
            elif dts:
                # is timedelta
                d['time'] = int(dts.group(1))
            else:
                print "Error parsing time:", tim
                continue

            dep.append(Departure(**d))

        return dep

Пример #3

Показать файл

Файл: routing.py Проект: kelvan/gotoVienna

class rParser:
    """ Parser for routing results
    """

    def __init__(self, html):
        self.soup = BeautifulSoup(html)
        self._overview = None
        self._details = None

    @classmethod
    def get_tdtext(cls, x, cl):
            return x.find('td', {'class': cl}).text

    @classmethod
    def get_change(cls, x):
        y = rParser.get_tdtext(x, 'col_change')
        if y:
            return int(y)
        else:
            return 0

    @classmethod
    def get_price(cls, x):
        y = rParser.get_tdtext(x, 'col_price')
        if y == '*':
            return 0.0
        if y.find(','):
            return float(y.replace(',', '.'))
        else:
            return 0.0

    @classmethod
    def get_date(cls, x):
        y = rParser.get_tdtext(x, 'col_date')
        if y:
            return datetime.strptime(y, '%d.%m.%Y').date()
        else:
            return None

    @classmethod
    def get_datetime(cls, x):
        y = rParser.get_tdtext(x, 'col_time')
        if y:
            if (y.find("-") > 0):
                # overview mode
                times = map(lambda z: time(*map(int, z.split(':'))), y.split('-'))
                d = rParser.get_date(x)
                from_dtime = datetime.combine(d, times[0])
                if times[0] > times[1]:
                    # dateline crossing
                    to_dtime = datetime.combine(d + timedelta(1), times[1])
                else:
                    to_dtime = datetime.combine(d, times[1])

                return [from_dtime, to_dtime]

            else:
                dtregex = {'date' : '\d\d\.\d\d',
                           'time': '\d\d:\d\d'}

                regex = "\s*(?P<date1>{date})?\s*(?P<time1>{time})\s*(?P<date2>{date})?\s*(?P<time2>{time})\s*".format(**dtregex)
                ma = re.match(regex, y)

                if not ma:
                    return []

                gr = ma.groupdict()

                def extract_datetime(gr, n):
                    if 'date%d' % n in gr and gr['date%d' % n]:
                        if gr['time%d' % n] == '24:00':
                            gr['time%d' % n] = '0:00'
                        from_dtime = datetime.strptime(str(datetime.today().year) + gr['date%d' % n] + gr['time%d' % n], '%Y%d.%m.%H:%M')
                    else:
                        d = datetime.today().date()
                        # Strange times possible at wienerlinien
                        if gr['time%d' % n] == '24:00':
                            gr['time%d' % n] = '0:00'
                            d += timedelta(days=1)
                        t = datetime.strptime(gr['time%d' % n], '%H:%M').time()
                        
                        return datetime.combine(d, t)

                # detail mode
                from_dtime = extract_datetime(gr, 1)
                to_dtime = extract_datetime(gr, 2)

                return [from_dtime, to_dtime]

        else:
            return []

    def __iter__(self):
        for detail in self.details():
            yield detail

    def _parse_details(self):
        tours = self.soup.findAll('div', {'class': 'data_table tourdetail'})

        trips = map(lambda x: map(lambda y: {
                        'timespan': rParser.get_datetime(y),
                        'station': map(lambda z: z[2:].strip(),
                                       filter(lambda x: type(x) == NavigableString, y.find('td', {'class': 'col_station'}).contents)), # filter non NaviStrings
                        'info': map(lambda x: x.strip(),
                                    filter(lambda z: type(z) == NavigableString, y.find('td', {'class': 'col_info'}).contents)),
                    }, x.find('tbody').findAll('tr')),
                    tours) # all routes
        return trips

    @property
    def details(self):
        """returns list of trip details
        [ [ { 'time': [datetime.time, datetime.time] if time else [],
              'station': [u'start', u'end'] if station else [],
              'info': [u'start station' if station else u'details for walking', u'end station' if station else u'walking duration']
            }, ... # next trip step
          ], ... # next trip possibility
        ]
        """
        if not self._details:
            self._details = self._parse_details()

        return self._details

    def _parse_overview(self):

        # get overview table
        table = self.soup.find('table', {'id': 'tbl_fahrten'})

        # check if there is an overview table
        if table and table.findAll('tr'):
            # get rows
            rows = table.findAll('tr')[1:] # cut off headline

            overview = map(lambda x: {
                               'timespan': rParser.get_datetime(x),
                               'change': rParser.get_change(x),
                               'price': rParser.get_price(x),
                           },
                           rows)
        else:
            raise ParserError('Unable to parse overview')

        return overview

    @property
    def overview(self):
        """dict containing
        date: datetime
        time: [time, time]
        duration: time
        change: int
        price: float
        """
        if not self._overview:
            try:
                self._overview = self._parse_overview()
            except AttributeError:
                f = open(DEBUGLOG, 'w')
                f.write(str(self.soup))
                f.close()

        return self._overview