def _do_download(self, url, save_path): """ Attempts to retrieve the image from the remote URL. """ opener = urllib2.build_opener(DefaultErrorHandler()) request = urllib2.Request(url) filename = url.rsplit('/', 1)[-1] filename = "%s%s" % (save_path, filename) try: image_file = open(filename, "r") except IOError: try: datastream = opener.open(request) except urllib2.URLError, e: log.error("[ERROR] %s <%s>" % (e, url)) datastream = None pass except httplib.BadStatusLine, e: log.error("[ERROR] %s <%s>" % (e, url)) datastream = None pass
option_list = BaseCommand.option_list + ( make_option( '-f', '--forum', action='store', type="string", nargs=1, dest='forum', help='If used, will force a hard refresh and re-download of all images.' ), ) def handle(self, *args, **options): """ Wraps the store_images method. """ log.info("Export script initialised.") forum = options.get('forum') if forum: try: module = __import__('zetaboardsbackup.forum.backends.%s' % forum, fromlist=['EXPORTER']) klass = getattr(module, 'EXPORTER') except (ImportError, AttributeError), e: raise ImproperlyConfigured("%s: check your --forum flag, are you sure that backend exists?" % e) else: backend = klass() backend.export() else: log.error("The --forum flag (-f) was not provided. Please provide the backend you wish to export to.") log.info("Complete.")
type="string", nargs=1, dest='forum', help='If used, will force a hard refresh and re-download of all images.' ), ) def handle(self, *args, **options): """ Wraps the store_images method. """ log.info("Export script initialised.") forum = options.get('forum') if forum: try: module = __import__('zetaboardsbackup.forum.backends.%s' % forum, fromlist=['EXPORTER']) klass = getattr(module, 'EXPORTER') except (ImportError, AttributeError), e: raise ImproperlyConfigured( "%s: check your --forum flag, are you sure that backend exists?" % e) else: backend = klass() backend.export() else: log.error( "The --forum flag (-f) was not provided. Please provide the backend you wish to export to." ) log.info("Complete.")
class ImageUrlDownloader(object): """ Handles all related this related to downloading image urls. """ def download(self, queryset, fields, save_path=IMAGE_SAVE_PATH): """ Takes in a Django queryset and an iterative of field names as strings to search for image URLs to download. """ data_to_search = [] urls_to_download = [] # For every item in the fieldset, check the fields for # data to add to our of things to search for urls. for item in queryset: for field in fields: text = getattr(item, field) if text: data_to_search.append(text) # Extract all possible image urls from the data. for text in data_to_search: urls = self._extract_image_urls(text) if urls: log.debug("Found URLs: %s" % urls) urls_to_download += urls # Attempt to download and save all the URLs found. for url in urls_to_download[::-1]: self._do_download(url, save_path) def _extract_image_urls(self, text): """ Given a chunk of text, returns a list of all matched URIs that point to images. """ image_urls = [] # Break up adjacent bbtags. text = text.replace('][', '] [') url_re = re.compile( "(https?\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(?:\/\S*)?(?:[a-zA-Z0-9_])+\.(?:jpg|jpeg|gif|png|bmp|svg))", re.IGNORECASE) matches = url_re.findall(text) if matches: image_urls += matches return image_urls def _do_download(self, url, save_path): """ Attempts to retrieve the image from the remote URL. """ opener = urllib2.build_opener(DefaultErrorHandler()) request = urllib2.Request(url) filename = url.rsplit('/', 1)[-1] filename = "%s%s" % (save_path, filename) try: image_file = open(filename, "r") except IOError: try: datastream = opener.open(request) except urllib2.URLError, e: log.error("[ERROR] %s <%s>" % (e, url)) datastream = None pass except httplib.BadStatusLine, e: log.error("[ERROR] %s <%s>" % (e, url)) datastream = None pass if datastream: if hasattr(datastream, 'status'): log.error("[ERROR: HTTP %s] %s" % (datastream.status, url)) else: image_file = open(filename, "wb") image_file.write(datastream.read()) image_file.close() log.info("[SUCCESS] Image downloaded and saved. <%s>" % url)