Python BeautifulSoup示例

编程语言: Python

命名空间/包名称: b4

类/类型: BeautifulSoup

hotexamples.com的示例: 8

Python BeautifulSoup - 已找到8个示例。这些是从开源项目中提取的最受好评的b4.BeautifulSoup现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

BeautifulSoup(5)

findAll(3)

find_all(1)

select(1)

示例#1

显示文件

文件： 27_python.py 项目： 321Riccardo/Source-Code-from-Tutorials

def get_single_item_data(item_url):
    source_code = requests.get(item_url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text)
    for item_name in soup.findAll('div', {'class': 'i-name'}):
        print(item_name.string)
    for link in soup.findAll('a'):
        href = "https://buckysroom.org" + link.get('href')
        print(href)

示例#2

显示文件

def get_single_item_data(item_url):
    source_code = requests.get(item_url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text)
    for item_name in soup.findAll('div', {'class': 'i-name'}):
        print(item_name.string)
    for link in soup.findAll('a'):
        href = "https://buckysroom.org" + link.get('href')
        print(href)

示例#3

显示文件

文件： 26_python.py 项目： 321Riccardo/Source-Code-from-Tutorials

def trade_spider(max_pages):
  page = 1
  while page <= max_pages:
    url = 'https://buckysroom.org/trade/search.php?page=' + str(page)
    source_code = requests.get(url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text)
    for link in soup.findAll('a', {'class': 'item-name'}):
        href = "https://buckysroom.org" + link.get('href')
        title = link.string
        print(href)
        print(title)
    page += 1

示例#4

显示文件

文件： crawler.py 项目： sarpadda/web-crawler

def trade_spider(max_pages):
    page = 1
    while page <= max_pages:
        url = 'DESIRED WEBSITE' + str(page)
        source_code = requests.get(url)
        plain_text = source_code.text
        soup = BeautifulSoup(plain_text)
        for link in soup.findAll('a', {'class': 'item-name'}):
            href = "DESIRED WEBSITE" + link.get('href')
            title = link.string

            get_single_item_data(href)
        page += 1

示例#5

显示文件

def trade_spider(max_pages):
    page = 1
    while page <= max_page:
        url = 'https://buckysroom.org/trade/search.php?page=' + str(page)
        source_code = requests.get(url)
        plain_text = source_code.text
        soup = BeautifulSoup(plain_text)
        for link in soup.findAll('a', {'class': 'item-name'}):
            href = "https://buckysroom.org" + link.get('href')
            title = link.string
            print(href)
            print(title)
        page += 1

示例#6

显示文件

文件： random_composer.py 项目： rickh94/random_composer

def get_composer(number):
    wiki_composer_page = requests.get(
        "https://en.wikipedia.org/wiki/List_of_classical_music_composers_by_era"
    )
    if not wiki_composer_page.ok:
        print("Failed to download list of composers")
        raise SystemExit(1)
    composer_soup = BeautifulSoup(wiki_composer_page.text, "html.parser")
    timelines = composer_soup.find_all("div", class_="timeline-wrapper")
    composer_elements = []
    for item in timelines:
        composer_elements.extend(item.find_all("area"))
    composers = [comp['title'] for comp in composer_elements]

    choices = random.choices(composers, k=number)
    for item in choices:
        print(item)

示例#7

显示文件

文件： test.py 项目： parrot88/ama_scrape

# -*- coding: utf-8 -*-
from b4 import BeautifulSoup
#from b4 import BeautifulSoup
import urllib2
#import os, re, urlparse

#Site = 'https://www.google.co.jp'
Site = 'http://www.yahoo.co.jp/'

soup = BeautifulSoup(urllib2.urlopen(Site), "lxml")
#res = soup.find_all("a")
#res = soup.a.get("href")
res = soup.select('a[href^="http://"]')

for one in res:
    print one

#from pprint import pprint
#pprint(txt)
print 'Finish'

示例#8

显示文件

import requests
from b4 import BeautifulSoup

url = "http://kurstenge.kz"
r = requests.get(url)
html = r.text
soup = BeautifulSoup(html, "html.parser")
rows = soup.select("#content table table tr")
for tr in rows:
    print(tr.text)