def list_albums(year, month): """Returns list of dictionaries representing all photo albums for a given month """ html = http.get(ALBUMS.format(year=year, month=month)).text soup = BeautifulSoup(html) divs = soup.findAll('div', {'class': 'headDiv2'}) retval = [] if not divs: # old version of the site table = soup.findAll('table')[1] links = [a for a in soup.findAll('table')[1].findAll('a') \ if a.text and a.text != 'comments'] for link in links: retval.append({'name': link.text, 'url': link['href']}) else: for div in divs: retval.append({'name': div.a.text, 'url': div.a['href']}) invalid_chars = ['\\', '/', ':', '*', '?', '"', '<', '>', '|'] for album in retval: for char in invalid_chars: album['name'] = album['name'].replace(char, '_') album['name'] = album['name'].replace(u'\x92', "'")\ .replace(u'\x93', '')\ .replace(u'\x94', '')\ .strip('.') return retval[::-1]
def list_album_photos(url): """Returns a list of dictionaries representing images in the form [{'url': url, 'caption': caption}, ...] """ html = http.get(url).text soup = BeautifulSoup(html) # Process top image, prepending album intro text to caption top_image_div = soup.find('div', {'class': 'bpImageTop'}) if not top_image_div: # Not a gallery - probably a blog post. return [] entry = make_image_dict(top_image_div) intro_text = get_album_intro_text(soup) entry['caption'] = u'{0} || {1}'.format( intro_text, entry['caption']) retval = [entry] # Process rest of images divs = soup.findAll('div', {'class': 'bpBoth'}) for div in divs: image = make_image_dict(div) if image: retval.append(image) return retval
def download_album(name, path, url): """Downloads a photo album if necessary""" if not os.path.exists(path): photos = list_album_photos(url) if not photos: return log.info(u'Downloading: "{0}"'.format(name)) os.makedirs(path) i = 0 for photo in photos: i += 1 orig_filename = photo['url'].split('/')[-1] file_path = os.path.join(path, '{0} - {1}'.format(i, orig_filename)) try: response = http.get(photo['url'], stream=True) except http.DownloadError: continue with open(file_path, 'wb') as f: shutil.copyfileobj(response.raw, f) if not file_path.endswith('.gif'): write_caption(file_path, photo['caption']) else: log.info(u'Photo album "{0}" already downloaded, skipping...'.format(name))
def test_retry_on_bad_status_code(self, mocked_requests): """Should retry when we get a response not in the 2xx range""" with patch.object(http, 'get') as retry_mock: http.get('http://localhost') self.assertTrue(retry_mock.called)
def test_retry_on_requests_exception(self, mocked_requests): """Should retry when requests raises an exception""" with patch.object(http, 'retry_or_fail') as retry_mock: http.get('http://localhost') self.assertTrue(retry_mock.called)