Python BeautifulSoup.BeautifulSOAP示例

编程语言: Python

类/类型: BeautifulSoup

方法/功能: BeautifulSOAP

hotexamples.com的示例: 7

Python BeautifulSoup.BeautifulSOAP - 已找到7个示例。这些是从开源项目中提取的最受好评的BeautifulSoup.BeautifulSOAP 来自程序包 Tautulli现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

BeautifulSoup(30)

BeautifulStoneSoup(30)

SoupStrainer(28)

NavigableString(16)

BeautifulSOAP(7)

ICantBelieveItsBeautifulSoup(3)

RobustHTMLParser(1)

get_starttag_text(1)

示例#1

显示文件

    def loadConfig(self, configPath):
        "Initialize the state of the fairy from XML config files"
        self.configPath = os.path.join(
            os.path.dirname(os.path.abspath(sys.argv[0])), configPath)
        printAndLog("Merge Fairy loading configuration file " +
                    self.configPath)

        soup = BeautifulSoup.BeautifulSOAP(open(self.configPath))
        dom = soup.first("fairy")

        for attr in ["urlbase", "pathbase", "buildcmd"]:
            self.__dict__[attr] = dom[attr]
        self.pollinterval = int(float(dom["pollinterval"]))

        self.branches = []
        self.branchMap = {}  # map between URLs and branch objects
        for branchNode in dom.fetch("branch"):
            branch = SvnBranch(self.buildcmd,
                               urlbase=self.urlbase,
                               pathbase=self.pathbase).initFromXml(branchNode)
            self.branches.append(branch)
            self.branchMap[branch.url] = branch
        self.branches.sort(key=lambda b: b.url)

        self.dependencyMap = DependencyMap()
        for dependencyNode in dom.fetch("dependency"):
            dependency = Dependency().initFromXml(dependencyNode, self)
            self.dependencyMap.addDependency(dependency)

示例#2

显示文件

文件： download_epel.py 项目： stardust2602/script-1

def rpm_list(urllink):
    download_list = []
    html_doc = urllib2.urlopen(urllink)
    html_bs = bs.BeautifulSOAP(html_doc)
    for i in html_bs.findAll('a'):
        download_list.append(i.attrs[0][1])
    return download_list[1:]

示例#3

显示文件

 def _reddit(self, args):
     """Usage: `{cmd_prefix}reddit [*subreddits]`"""
     output = []
     args = args if args else ['']
     for arg in args:
         if arg:
             site = 'http://www.reddit.com/r/{}'.format(arg)
             logger.log((site, ), (None, ))
         else:
             site = 'http://www.reddit.com/'
         bs = BeautifulSoup.BeautifulSOAP(urlgrabber.urlread(site, size=2097152*10))
         output.extend(bs.findAll('a', 'title'))
     return '\n'.join('{}: {} {}'.format(i + 1, o.string, o.get('href')) for i, o in enumerate(output[:5]))

示例#4

显示文件

文件： pay.py 项目： stonegithubs/hack12306

        def parse_resp(content):
            params = []

            soup = BeautifulSoup.BeautifulSOAP(content)
            form = soup.find('form')

            form_attr_dict = dict(form.attrs)
            for e in form.findAll('input'):
                e_attr_dict = dict(e.attrs)
                params.append((e_attr_dict['name'], e_attr_dict['value']))

            return form_attr_dict['action'], form_attr_dict['method'].upper(
            ), dict(params)

示例#5

显示文件

def GetBlogHistory(url, direction):
    global totalnum
    global currentnum

    if currentnum > 15:
        print "sleep 30 seconds before continuing retrieving new instances"
        time.sleep(
            30
        )  #sleep 30seconds after retrieved 20 instances, way to avoid block from site for too frequent access...
        currentnum = 0
    currentnum += 1

    try:
        driver.get(url)
    except:
        if printtimeoutexcept == 1:
            print "retry for timeout(20sec) exception, url: " + url
        GetBlogHistory(url, direction)
    content = driver.page_source
    soup = BeautifulSoup.BeautifulSOAP(content)
    if direction == 0 or direction == -1:
        llist = soup.find("div", attrs={"class": "pleft thide"})
        if (llist != None):
            lurl = dict(llist.contents[0].attrs)['href']
            if printblognum == 1:
                totalnum += 1
                print "%d. %s %s" % (totalnum, lurl,
                                     html_parser.unescape(
                                         dict(llist.attrs)['a']))
            else:
                print lurl + '  ' + html_parser.unescape(
                    dict(llist.attrs)['a'])
            GetBlogHistory(lurl, -1)
    if direction == 0 or direction == 1:
        rlist = soup.find("div", attrs={"class": "pright thide"})
        if (rlist != None):
            rurl = dict(rlist.contents[0].attrs)['href']
            if printblognum == 1:
                totalnum += 1
                print "%d. %s %s" % (totalnum, rurl,
                                     html_parser.unescape(
                                         dict(rlist.attrs)['a']))
            else:
                print rurl + '  ' + html_parser.unescape(
                    dict(rlist.attrs)['a'])
            GetBlogHistory(rurl, 1)

示例#6

显示文件

def parse_full(xml):
    '''given the xml returned from a comp search, try an iterrupt 
    the returned data from a search query.
    '''
    soup = BeautifulSoup.BeautifulSOAP(xml)
    data = soup.find('data')
    if not data:
        raise ParseException('unable to find data element')

    totals = data.find('totals')
    results = data.find('results')
    if not totals:
        raise ParseException('unable to find totals element')
    if not results:
        raise ParseException('unable to find results element')

    # totals
    #
    by_category = {}
    for tab in totals.findAll('tab'):
        table_name = tab.find('table').text
        count = int(tab.find('total').text)
        by_category[table_name] = count

    # results. since we're not exactly sure what's coming back (other then
    # inspecting the category the query was made with) lets just be better at
    # handling stuff
    #
    result_rows = []
    for child in results.childGenerator():
        if not isinstance(child, BeautifulSoup.Tag):
            continue
        result_rows.append(_soupdict(child))

    return {
        'totals': by_category,
        'rows': result_rows,
    }

示例#7

显示文件

def GetBlogHistory(url, direction):
    global totalnum
    try:
        driver.get(url)
    except:
        if printtimeoutexcept == 1:
            print "Retry for timeout(20sec) exception, url: " + url
        GetBlogHistory(url, direction)
    content = driver.page_source
    soup = BeautifulSoup.BeautifulSOAP(content)
    if direction == 0 or direction == -1:
        llist = soup.find("div", attrs={"class": "pleft thide"})
        if (llist != None):
            lurl = dict(llist.contents[0].attrs)['href']
            if printblognum == 1:
                totalnum += 1
                print "%d. %s %s" % (totalnum, lurl,
                                     html_parser.unescape(
                                         dict(llist.attrs)['a']))
            else:
                print lurl + '  ' + html_parser.unescape(
                    dict(llist.attrs)['a'])
            GetBlogHistory(lurl, -1)
    if direction == 0 or direction == 1:
        rlist = soup.find("div", attrs={"class": "pright thide"})
        if (rlist != None):
            rurl = dict(rlist.contents[0].attrs)['href']
            if printblognum == 1:
                totalnum += 1
                print "%d. %s %s" % (totalnum, rurl,
                                     html_parser.unescape(
                                         dict(rlist.attrs)['a']))
            else:
                print rurl + '  ' + html_parser.unescape(
                    dict(rlist.attrs)['a'])
            GetBlogHistory(rurl, 1)