Python tag_text示例，beautifulsoup.tag_text Python示例

示例#1

0

显示文件

文件： import_wff.py 项目： thuvh/filmmaster

    def parse_film(self, soup):
        belka = soup.find(ct('ps_belka'))
        title_pl = tag_text(belka.find('h2'))
        title = tag_text(belka.find('h4'))
        rows = soup.findAll(ct('row'))
        def _row(r):
            p = tag_text(r).split(':', 1)
            if len(p) == 2:
                return p[0].strip(), p[1].strip()
            else:
                return None, None

        props = dict(_row(row)  for row in rows)

        screenings = soup.find(ct('pokazy'))
        screenings = screenings and screenings.findAll('li')[1:]
        screenings = screenings and [tag_text(s) for s in screenings] or []
        
        img_url = soup.find(ct('ps_body')).find('img')
        img_url = img_url and img_url.parent['href']

        return {
            'title':title,
            'title_localized':title_pl,
#            'props':props,
            'directors':filter(bool, [i.strip() for i in props.get(u'Reżyser', '').split(',')]),
            'times':[self.parse_t(t) for t in screenings],
            'year':props.get('Rok produkcji', None),
            'img_url':img_url,
        }

示例#2

0

显示文件

文件： import_lff.py 项目： thuvh/filmmaster

 def parse_t(self, t):
     date = tag_text(t.find(ct('perf_date')))
     day = re.match("\w+\s*(\d+)", date).group(1)
     h, m = re.match("(\d+):(\d+)", tag_text(t.find(ct('perf_time')))).groups()
     venue = tag_text(t.find(ct('perf_venue'))).strip()
     theater = self.get_theater(venue)
     if not theater:
         logger.warning('no theater found for %r', venue)
         return
     t = self.get_screening_datetime(h, m, day)
     return theater, pytz.timezone(theater.timezone_id).localize(t), venue

示例#3

0

显示文件

 def parse_t(self, t):
     date = tag_text(t.find(ct('perf_date')))
     day = re.match("\w+\s*(\d+)", date).group(1)
     h, m = re.match("(\d+):(\d+)",
                     tag_text(t.find(ct('perf_time')))).groups()
     venue = tag_text(t.find(ct('perf_venue'))).strip()
     theater = self.get_theater(venue)
     if not theater:
         logger.warning('no theater found for %r', venue)
         return
     t = self.get_screening_datetime(h, m, day)
     return theater, pytz.timezone(theater.timezone_id).localize(t), venue

示例#4

0

显示文件

    def parse_film(self, soup):
        title = soup.find(id="header-one-films")
        title = title and tag_text(title.find('h1'))
        props = soup.findAll(ct('screening-with-credits-item'))
        props = dict((tag_text(p.find(ct('screening-with-credits-left'))),
                      tag_text(p.find(ct('screening-with-credits-right'))))
                     for p in props)
        directors = [i.strip() for i in props.get('Director', '').split(',')]
        synopsis = tag_text(soup.find(ct('program-item-alternatetitle')))
        try:
            year = props.get('Year')
            year = year and int(year) or None
        except ValueError:
            year = None

        return {
            'title': title,
            'directors': filter(bool, directors),
            'year': year,
            'synopsis': synopsis,
        }

示例#5

0

显示文件

文件： import_lff.py 项目： thuvh/filmmaster

    def parse_film(self, soup):
        title = soup.find(id="header-one-films")
        title = title and tag_text(title.find('h1'))
        props = soup.findAll(ct('screening-with-credits-item')) 
        props = dict((
            tag_text(p.find(ct('screening-with-credits-left'))), 
            tag_text(p.find(ct('screening-with-credits-right')))
            ) for p in props)
        directors = [i.strip() for i in props.get('Director', '').split(',')]
        synopsis = tag_text(soup.find(ct('program-item-alternatetitle')))
        try:
            year = props.get('Year')
            year = year and int(year) or None
        except ValueError:
            year = None


        return {
                'title': title,
                'directors':filter(bool, directors),
                'year':year,
                'synopsis':synopsis,
                }

示例#6

0

显示文件

文件： import_wff.py 项目： manlan2/filmaster

    def parse_film(self, soup):
        belka = soup.find(ct('ps_belka'))
        title_pl = tag_text(belka.find('h2'))
        title = tag_text(belka.find('h4'))
        rows = soup.findAll(ct('row'))

        def _row(r):
            p = tag_text(r).split(':', 1)
            if len(p) == 2:
                return p[0].strip(), p[1].strip()
            else:
                return None, None

        props = dict(_row(row) for row in rows)

        screenings = soup.find(ct('pokazy'))
        screenings = screenings and screenings.findAll('li')[1:]
        screenings = screenings and [tag_text(s) for s in screenings] or []

        img_url = soup.find(ct('ps_body')).find('img')
        img_url = img_url and img_url.parent['href']

        return {
            'title':
            title,
            'title_localized':
            title_pl,
            #            'props':props,
            'directors':
            filter(bool,
                   [i.strip() for i in props.get(u'Reżyser', '').split(',')]),
            'times': [self.parse_t(t) for t in screenings],
            'year':
            props.get('Rok produkcji', None),
            'img_url':
            img_url,
        }

示例#7

0

显示文件

文件： import_wff.py 项目： manlan2/filmaster

 def _row(r):
     p = tag_text(r).split(':', 1)
     if len(p) == 2:
         return p[0].strip(), p[1].strip()
     else:
         return None, None

示例#8

0

显示文件

文件： import_wff.py 项目： thuvh/filmmaster

 def _row(r):
     p = tag_text(r).split(':', 1)
     if len(p) == 2:
         return p[0].strip(), p[1].strip()
     else:
         return None, None