예제 #1
0
from zerospider import fetch

def save_to_disk(html, url, title):
    fname = title.replace('-', ' ')
    with open(fname, 'w') as f:
        f.write(html.encode('utf-8'))

fetch(domain = 'bpetrushev.appspot.com',
      seed = ('/t',),
      save_rules = ['/t/<string:title>'],
      processor = save_to_disk,
      status_path = 'bpetrushev.status')
예제 #2
0
from zerospider import fetch
from lxml.html import fromstring
import requests


def save_to_disk(html, url, comic_id):
    doc = fromstring(html.encode("utf-8"))
    img_src = doc.cssselect("div#comic img[src]")[0].attrib["src"]
    img_content = requests.get(img_src).content
    fname = "xkcd/%d.jpg" % (comic_id)
    with open(fname, "wb") as f:
        f.write(img_content)


fetch(
    domain="www.xkcd.com",
    seed=("/",),
    save_rules=["/<int:comic_id>/"],  # http://xkcd.com/513
    processor=save_to_disk,
    crawlers=10,
    status_path="xkcd/status",
)