def parseintodict(file): data = open(file).read() links = {} for type, html, link in parsers.links_in_article(data): if not links.has_key(type): links[type] = [] links[type].append((html, link)) return links
import urllib import parsers file = urllib.urlopen("http://www.nhs.uk/news/2010/03March/Pages/Impotence-is-a-warning-sign-for-heart-risk.aspx") for tag in parsers.links_in_article(file.read()): print tag
def scrape(self): links = parsers.links_in_article(self.original_html()) self.update_links(links) return self.scraped