示例#1
0
def downloadXkcd(start_comic, end_comic):
   for url_number in range(start_comic, end_comic):
       print(f'{url_number}')
       r = HTMLSession().get(f'https://xkcd.com/{url_number}')
       r.raise_for_status()
       comic = r.html.find('#comic img', first=True)
       if comic == []:
           print('Could not find comic image.')
       else:
           comic_url = comic.attrs['src']
           #print(f'Downloading image {comic_url}...')
           r = HTMLSession().get('https:' + comic_url)
           r.raise_for_status()
           with open(os.path.join('D:\\Ambiente de Trabalho\\pasta', os.path.basename(comic_url)), 'wb') as image_file:
                 image_file.write(r.content)
def fetch_ads(url='http://tankeogteknikk.no/qmedia/oslo.php'):
    """Crawl tankeogteknikk web site and fetch current ads"""
    try:
        r = HTMLSession().get(url)
    except RequestError:
        logger.exception('failed to fetch ads')
        return []

    r.raise_for_status()  # raise exception if 404 or other non ok http status
    subs = r.html.find('table.sub')
    ads = [_parse_sub_advert(sub) for sub in subs]
    for ad in ads:
        # use absolute and quoted urls
        ad['image'] = parse.quote(parse.urljoin(url, ad['image']), safe='/:')
    return ads