Пример #1
0
 def parse_detail(self, id, html):
     soup = BeautifulSoup(html)
     series = {}
     series['title'] = select(soup, 'h1 a')[0].text
     
     pre_text = unicode(select(soup, '#eplist pre')[0])
     seasons_html = pre_text.split('•')[1:]
     seasons = []
     for season_html in seasons_html:
         season = {}
         season['number'] = int(re.search('Season (\d+)', season_html).groups(1)[0])
         episodes = []
         episodes_html = season_html.split('\n')[1:] # Ignore the "Season" line.
         for episode_html in episodes_html:
             m = re.match('^\s*(\d+)\s+(\d+)-(\d+)\s+(\w+)\s+(\d{2}\/\w{3}\/\d{2})\s+<a.*?>(.*?)<\/a>', episode_html)
             if m is None: continue
             data = m.groups(1)
             air_date = strptime(data[4], '%d/%b/%y')
             episode = {}
             episode['epid'] = int(data[0], 10)
             episode['season'] = int(data[1], 10)
             episode['number'] = int(data[2], 10)
             episode['production_number'] = data[3]
             episode['air_date'] = strftime('%Y-%m-%d', air_date)
             episode['title'] = data[5]
             episodes.append(episode)
         season['episodes'] = episodes
         seasons.append(season)
     series['seasons'] = seasons
     return series
Пример #2
0
 def parse_list(self, html):
     soup = BeautifulSoup(html)
     immoweb_ids = []
     estates = []
     estates_table_rows = select(soup, '.result-liste tr')
     for estate_row in estates_table_rows[1:]:
         estate = {}
         estate_row_cells = select(estate_row, 'td')
         # Cell 0: price
         estate_price_cell = estate_row_cells[0]
         conditional_set(estate, 'price', parse_price(estate_price_cell.string))
         # Cell 1: type + link
         estate_link = select(estate_row, 'a')[0]['href']
         estate['id'] = parse_immoweb_link(estate_link)
         estate['estate_type'] = parse_estate_type(deep_contents(estate_row_cells[1]))
         # Cell 2: area
         conditional_set(estate, 'area', parse_first_number(estate_row_cells[2].string))
         # Cell 3: bedrooms
         conditional_set(estate, 'bedrooms', parse_first_number(estate_row_cells[3].string))
         # Cell 4: zip_code + city
         estate_city_cell = estate_row_cells[4]
         estate['zip_code'] = parse_zip_code(estate_city_cell.string)
         estate['city'] = parse_city(estate_city_cell.string)
         estates.append(estate)
     return estates
Пример #3
0
    def parse_detail(self, id, html):
        song = {}
        song['artist'] = id[0]
        song['title'] =  id[1]

        #last.fm puts some strange tag in its code that caused an error when it was left in.
        endash = unichr(0x2013)
        html = unicode(html.replace('<!%s[if IE]><![endif]%s>' % (endash, endash), ''))

        soup = BeautifulSoup(html)
        cloudItems = select(soup, '#tagcloud .cloudItem')
        tags = []
        for cloudItem in cloudItems:
            m = re.search('font-size: (\d+)px', cloudItem['style'])
            weigth = int(m.group(1)) if m is not None else -1
            tags.append((weigth, cloudItem.text))
        song['tags'] = tags
        return song