Exemplos de BeautifulSoup em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: resources.lib.external.BeautifulSoup

Classe / Tipo: BeautifulSoup

Exemplos em hotexamples.com: 4

BeautifulSoup em Python - 4 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de resources.lib.external.BeautifulSoup.BeautifulSoup em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

BeautifulSoup(4)

find(2)

findAll(2)

renderContents(1)

Métodos Frequentes

BeautifulSoup (4)

find (2)

findAll (2)

renderContents (1)

Relacionados

Pintracker

TextTable

loadMap

findTransitionState

get_postprocessor_human_name

ensure_app_dirs_exists

is_arbitrator_required

grid_walk

schema

unescape

Related in langs

TextPropertyItem (PHP)

ilCSVWriter (PHP)

SharedMemoryMapping (C#)

SafeCTLHandleContext (C#)

tty_cell (C++)

MusicPlayerIsPlaying (C++)

NewRangeQuery (Go)

Intent (Go)

I18N (Java)

OFBadPropertyCodeSerializerVer14 (Java)

Exemplo n.º 1

0

Exibir arquivo

Arquivo: scraper.py Projeto: SleepyyNet/xbmc_gomtv

def fetch_list(self, cat_id, page = 1): vids = {} response = urllib2.urlopen(self.video_list_url % (cat_id, page)) contents = response.read() page = BeautifulSoup(contents) matches = page.findAll('td', 'listOff') if matches: for match in matches: primary_link = match.find('a', 'vodlink') id = primary_link['href'].replace('./', '') if id.startswith('javascript'): continue re_match = re.search(r'Posted: (\d+) (\d+)/(\d+)<', match.parent.find('td', 'sect').renderContents()) year = re_match.group(1) month = re_match.group(2) day = re_match.group(3) date_string = "%s-%s-%s" % (day, month, year) posted_date = datetime.date(int(year), int(month), int(day)).strftime(self.date_format) image_url = 'http://www.gomtv.net' + match.find('img')['src'] local_image_path = os.path.join(self.base_data_path, cat_id, str(id) + '.tbn') if not os.path.exists(os.path.join(self.base_data_path, cat_id)): os.makedirs(os.path.join(self.base_data_path, cat_id)) if not os.path.isfile(local_image_path): urllib.urlretrieve(image_url, local_image_path) vids[id] = {'id': int(id), 'date_string': date_string, 'posted_date': posted_date, 'year': year, 'title': str(primary_link.string), 'description': str(match.find('div', 'vodinfo').renderContents()), 'image_url': local_image_path} return vids

Exemplo n.º 2

0

Exibir arquivo

Arquivo: scraper.py Projeto: SleepyyNet/xbmc_gomtv

def fetch_page_count(self, cat_id): count = 0 response = urllib2.urlopen(self.video_list_url % (cat_id, 1)) contents = response.read() page = BeautifulSoup(contents) link_table = page.find('table', {'id': 'bbsnum'}) count = re.search(r"<a href=\"\./\?page=(\d+)&[^>]*>Last >></a>", link_table.renderContents()).group(1) return count

Exemplo n.º 3

0

Exibir arquivo

Arquivo: scraper.py Projeto: SleepyyNet/xbmc_gomtv

def fetch_video(self, cat_id, vid_id): vid = None response = urllib2.urlopen(self.video_url % (cat_id, vid_id)) contents = response.read() page = BeautifulSoup(contents) re_match = re.search(r'\.swf\?link=(\d+)', page.renderContents()) if re_match: vid = {} file_id = re_match.group(1) vid['title'] = page.find('div', {'id': 'bbsDetail'}).h3.string vid['file_url'] = 'http://flvdn.gomtv.net/viewer/%s.flv' % file_id if not os.path.exists(os.path.join(self.base_data_path, cat_id)): os.makedirs(os.path.join(self.base_data_path, cat_id)) vid['local_vid_path'] = os.path.join(self.base_data_path, cat_id, str(vid_id) + '.flv') return vid

Exemplo n.º 4

0

Exibir arquivo

Arquivo: scraper.py Projeto: SleepyyNet/xbmc_gomtv

def fetch(self): cats = {} response = urllib2.urlopen(self.category_url) contents = response.read() page = BeautifulSoup(contents) channels = page.findAll('div', {'id': 'Channels'}) if channels: for channel in channels: shows = channel.findAll('dl') for show in shows: if show.dt.a: id = re.sub(r'/(.*)/', r'\1', show.dt.a['href']) image_url = show.find('dd', 'img').img['src'] if image_url.startswith('/'): image_url = 'http://www.gomtv.net' + image_url if show.find('dd', 'txt'): description = show.find('dd', 'txt').renderContents() else: description = '' cats[id] = {'id': id, 'title': show.dt.a.string, 'description': description, 'image_url': image_url } return cats