Python BeautifulSoup примеры использования

Язык программирования: Python

Пространство имен/Пакет: py2k

Класс/Тип: BeautifulSoup

Примеров на hotexamples.com: 12

Python BeautifulSoup - 12 примеров найдено. Это лучшие примеры Python кода для py2k.BeautifulSoup, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

BeautifulSoup(7)

find(2)

find_all(1)

prettify(1)

Пример #1

Показать файл

Файл: cljdocs.py Проект: deflexor/ClojureDoc-Search

def parse_doc(url):
    """Parsing the documentation."""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "docstring").find("pre")
    l = []
    if not stuff:
        ret = " \n" + "No documentation available!\n"
        l.append(ret.split("\n"))
        return l
    ret = "Documentation: \n" + stuff.text
    l.append(ret.split("\n"))
    return l

Пример #2

Показать файл

Файл: cljdocs.py Проект: deflexor/ClojureDoc-Search

def parse_source(url):
    """Parsing source"""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "source_content")
    l = []
    if not stuff:
        ret = " \n" + "No source code available!\n"
        l.append(ret.split("\n"))
        return l
    stuff = stuff.find("pre", "brush: clojure")
    ret = "Source:        \n" + stuff.text
    l.append(ret.split("\n"))
    return l

Пример #3

Показать файл

Файл: cljdocs.py Проект: Foxboron/ClojureDoc-Search

def parse_source(url):
    """Parsing source"""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "source_content")
    l = []
    if not stuff:
        ret = " \n" + "No source code available!\n"
        l.append(ret.split("\n"))
        return l
    stuff = stuff.find("pre", "brush: clojure")
    ret = "Source:        \n" + stuff.text
    l.append(ret.split("\n"))
    return l

Пример #4

Показать файл

Файл: diagnose.py Проект: Foxboron/ClojureDoc-Search

def diagnose(data):
    """Diagnostic suite for isolating common problems."""
    print "Diagnostic running on Beautiful Soup %s" % __version__
    print "Python version %s" % sys.version

    basic_parsers = ["html.parser", "html5lib", "lxml"]
    for name in basic_parsers:
        for builder in builder_registry.builders:
            if name in builder.features:
                break
        else:
            basic_parsers.remove(name)
            print(
                "I noticed that %s is not installed. Installing it may help." %
                name)

    if 'lxml' in basic_parsers:
        basic_parsers.append(["lxml", "xml"])
        from lxml import etree
        print "Found lxml version %s" % ".".join(map(str, etree.LXML_VERSION))

    if 'html5lib' in basic_parsers:
        import html5lib
        print "Found html5lib version %s" % html5lib.__version__

    if hasattr(data, 'read'):
        data = data.read()
    elif os.path.exists(data):
        print '"%s" looks like a filename. Reading data from the file.' % data
        data = open(data).read()
    elif data.startswith("http:") or data.startswith("https:"):
        print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
        print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
        return
    print

    for parser in basic_parsers:
        print "Trying to parse your markup with %s" % parser
        success = False
        try:
            soup = BeautifulSoup(data, parser)
            success = True
        except Exception, e:
            print "%s could not parse the markup." % parser
            traceback.print_exc()
        if success:
            print "Here's what %s did with the markup:" % parser
            print soup.prettify()

        print "-" * 80

Пример #5

Показать файл

Файл: diagnose.py Проект: Foxboron/ClojureDoc-Search

def diagnose(data):
    """Diagnostic suite for isolating common problems."""
    print "Diagnostic running on Beautiful Soup %s" % __version__
    print "Python version %s" % sys.version

    basic_parsers = ["html.parser", "html5lib", "lxml"]
    for name in basic_parsers:
        for builder in builder_registry.builders:
            if name in builder.features:
                break
        else:
            basic_parsers.remove(name)
            print (
                "I noticed that %s is not installed. Installing it may help." %
                name)

    if 'lxml' in basic_parsers:
        basic_parsers.append(["lxml", "xml"])
        from lxml import etree
        print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))

    if 'html5lib' in basic_parsers:
        import html5lib
        print "Found html5lib version %s" % html5lib.__version__

    if hasattr(data, 'read'):
        data = data.read()
    elif os.path.exists(data):
        print '"%s" looks like a filename. Reading data from the file.' % data
        data = open(data).read()
    elif data.startswith("http:") or data.startswith("https:"):
        print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data
        print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup."
        return
    print

    for parser in basic_parsers:
        print "Trying to parse your markup with %s" % parser
        success = False
        try:
            soup = BeautifulSoup(data, parser)
            success = True
        except Exception, e:
            print "%s could not parse the markup." % parser
            traceback.print_exc()
        if success:
            print "Here's what %s did with the markup:" % parser
            print soup.prettify()

        print "-" * 80

Пример #6

Показать файл

Файл: cljdocs.py Проект: Foxboron/ClojureDoc-Search

def parse_doc(url):
    """Parsing the documentation."""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "doc").find("div", "content")
    for e in stuff.findAll("br"):
        e.replace_with("\n")
    l = []
    if not stuff:
        ret = " \n" + "No documentation available!\n"
        l.append(ret.split("\n"))
        return l
    ret = "Documentation: \n" + stuff.text
    l.append(ret.split("\n"))
    return l

Пример #7

Показать файл

Файл: cljdocs.py Проект: Foxboron/ClojureDoc-Search

def parse_doc(url):
    """Parsing the documentation."""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find("div", "doc").find("div", "content")
    for e in stuff.findAll("br"):
        e.replace_with("\n")
    l = []
    if not stuff:
        ret = " \n" + "No documentation available!\n"
        l.append(ret.split("\n"))
        return l
    ret = "Documentation: \n" + stuff.text
    l.append(ret.split("\n"))
    return l

Пример #8

Показать файл

Файл: cljdocs.py Проект: deflexor/ClojureDoc-Search

def parse_example(url):
    """Parsing examples"""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find_all("div", "hidden plain_content")
    l = []
    if not stuff:
        ret = " \n" + "No examples available!\n"
        l.append(ret.split("\n"))
        return l
    num = 1
    for i in stuff:
        ret = "Example #" + str(num) + ":    \n" + i.text.rstrip("\n")
        l.append(ret.split("\n"))
        num += 1
    return l

Пример #9

Показать файл

Файл: cljdocs.py Проект: Foxboron/ClojureDoc-Search

def parse_example(url):
    """Parsing examples"""
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.find_all("div", "hidden plain_content")
    l = []
    if not stuff:
        ret = " \n" + "No examples available!\n"
        l.append(ret.split("\n"))
        return l
    num = 1
    for i in stuff:
        ret = "Example #" + str(num) + ":    \n" + i.text.rstrip("\n")
        l.append(ret.split("\n"))
        num += 1
    return l

Пример #10

Показать файл

Файл: cljdocs.py Проект: Foxboron/ClojureDoc-Search

def seealso_search(url):
    """Searches the 'See Also...' part. """
    v = content_request(url)
    soup = BeautifulSoup(v)
    stuff = soup.body.find_all("li", "see_also_item")
    items = []
    sites = []
    for i in stuff:
        item, web = new_parse(i)
        items.append(item)
        sites.append(web)
    return (items, sites)

Пример #11

Показать файл

Файл: cljdocs.py Проект: Foxboron/ClojureDoc-Search

def bs4_parse(var):
    """Parses out the indvidual search item"""
    var = request(var)
    soup = BeautifulSoup(var)
    stuff = soup.body.find_all("div", "search_result")
    items = []
    sites = []
    for i in stuff:
        item, web = parse_list(i)
        items.append(item)
        sites.append(web)
    return (items, sites)

Пример #12

Показать файл

Файл: diagnose.py Проект: Foxboron/ClojureDoc-Search

def benchmark_parsers(num_elements=100000):
    """Very basic head-to-head performance benchmark."""
    print "Comparative parser benchmark on Beautiful Soup %s" % __version__
    data = rdoc(num_elements)
    print "Generated a large invalid HTML document (%d bytes)." % len(data)

    for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
        success = False
        try:
            a = time.time()
            soup = BeautifulSoup(data, parser)
            b = time.time()
            success = True
        except Exception, e:
            print "%s could not parse the markup." % parser
            traceback.print_exc()
        if success:
            print "BS4+%s parsed the markup in %.2fs." % (parser, b - a)