Пример #1
0
def parseintodict(file):
    data = open(file).read()
    links = {}
    for type, html, link in parsers.links_in_article(data):
        if not links.has_key(type):
            links[type] = []
        links[type].append((html, link))
    return links
Пример #2
0
import urllib

import parsers

file = urllib.urlopen("http://www.nhs.uk/news/2010/03March/Pages/Impotence-is-a-warning-sign-for-heart-risk.aspx")
for tag in parsers.links_in_article(file.read()):
    print tag
Пример #3
0
 def scrape(self):
     links = parsers.links_in_article(self.original_html())
     self.update_links(links)
     return self.scraped