Пример #1
0
def list_albums(year, month):
    """Returns list of dictionaries representing all
    photo albums for a given month
    """
    html = http.get(ALBUMS.format(year=year, month=month)).text
    soup = BeautifulSoup(html)
    divs = soup.findAll('div', {'class': 'headDiv2'})
    retval = []
    if not divs:
        # old version of the site
        table = soup.findAll('table')[1]
        links = [a for a in soup.findAll('table')[1].findAll('a') \
                 if a.text and a.text != 'comments']
        for link in links:
            retval.append({'name': link.text, 'url': link['href']})
    else:
        for div in divs:
            retval.append({'name': div.a.text, 'url': div.a['href']})
    invalid_chars = ['\\', '/', ':', '*', '?', '"', '<', '>', '|']
    for album in retval:
        for char in invalid_chars:
            album['name'] = album['name'].replace(char, '_')
        album['name'] = album['name'].replace(u'\x92', "'")\
                                     .replace(u'\x93', '')\
                                     .replace(u'\x94', '')\
                                     .strip('.')
    return retval[::-1]
Пример #2
0
def list_album_photos(url):
    """Returns a list of dictionaries representing images in
    the form [{'url': url, 'caption': caption}, ...]
    """
    html = http.get(url).text
    soup = BeautifulSoup(html)

    # Process top image, prepending album intro text to caption
    top_image_div = soup.find('div', {'class': 'bpImageTop'})
    if not top_image_div:
        # Not a gallery - probably a blog post.
        return []
    entry = make_image_dict(top_image_div)
    intro_text = get_album_intro_text(soup)
    entry['caption'] = u'{0} || {1}'.format(
        intro_text, entry['caption'])
    retval = [entry]

    # Process rest of images
    divs = soup.findAll('div', {'class': 'bpBoth'})
    for div in divs:
        image = make_image_dict(div)
        if image:
            retval.append(image)
    return retval
Пример #3
0
def download_album(name, path, url):
    """Downloads a photo album if necessary"""
    if not os.path.exists(path):
        photos = list_album_photos(url)
        if not photos:
            return
        log.info(u'Downloading: "{0}"'.format(name))
        os.makedirs(path)
        i = 0
        for photo in photos:
            i += 1
            orig_filename = photo['url'].split('/')[-1]
            file_path = os.path.join(path, '{0} - {1}'.format(i, orig_filename))
            try:
                response = http.get(photo['url'], stream=True)
            except http.DownloadError:
                continue
            with open(file_path, 'wb') as f:
                shutil.copyfileobj(response.raw, f)
            if not file_path.endswith('.gif'):
                write_caption(file_path, photo['caption'])
    else:
        log.info(u'Photo album "{0}" already downloaded, skipping...'.format(name))
Пример #4
0
 def test_retry_on_bad_status_code(self, mocked_requests):
     """Should retry when we get a response not in the 2xx range"""
     with patch.object(http, 'get') as retry_mock:
         http.get('http://localhost')
         self.assertTrue(retry_mock.called)
Пример #5
0
 def test_retry_on_requests_exception(self, mocked_requests):
     """Should retry when requests raises an exception"""
     with patch.object(http, 'retry_or_fail') as retry_mock:
         http.get('http://localhost')
         self.assertTrue(retry_mock.called)