def get_soup(site, cssSelector=None): try: f = urllib.urlopen(site) except IOError: print "error connecting to site " + site exit() html = f.read() f.close() soup = BeautifulSoup(html) if not cssSelector: return soup return select(soup, cssSelector)
def getLinkHrefs( content, selector ): soup = BeautifulSoup(content) getHrefFromLink = lambda link : link["href"] links = select( soup, selector ) return map( getHrefFromLink, links )
def getLinkHrefs(content, selector): soup = BeautifulSoup(content) getHrefFromLink = lambda link: link["href"] links = select(soup, selector) return map(getHrefFromLink, links)