示例#1
0
def get_soup(site, cssSelector=None):
    try:
        f = urllib.urlopen(site)
    except IOError:
        print "error connecting to site " + site
        exit()

    html = f.read()
    f.close()
    soup = BeautifulSoup(html)
    if not cssSelector:
        return soup
    return select(soup, cssSelector)
示例#2
0
def getLinkHrefs( content, selector ):
	soup = BeautifulSoup(content)
	getHrefFromLink = lambda link : link["href"]
	links = select( soup, selector )
	return map( getHrefFromLink, links )
示例#3
0
def getLinkHrefs(content, selector):
    soup = BeautifulSoup(content)
    getHrefFromLink = lambda link: link["href"]
    links = select(soup, selector)
    return map(getHrefFromLink, links)