def _scrape_media(url, autoplay=False, maxwidth=600, force=False, use_cache=False, max_cache_age=None): media = None autoplay = bool(autoplay) maxwidth = int(maxwidth) # Use media from the cache (if available) if not force and use_cache: mediaByURL = MediaByURL.get(url, autoplay=autoplay, maxwidth=maxwidth, max_cache_age=max_cache_age) if mediaByURL: media = mediaByURL.media # Otherwise, scrape it if not media: media_object = secure_media_object = None thumbnail_image = thumbnail_url = thumbnail_size = None scraper = Scraper.for_url(url, autoplay=autoplay) try: thumbnail_image, media_object, secure_media_object = ( scraper.scrape()) except (HTTPError, URLError) as e: if use_cache: MediaByURL.add_error(url, str(e), autoplay=autoplay, maxwidth=maxwidth) return None # the scraper should be able to make a media embed out of the # media object it just gave us. if not, null out the media object # to protect downstream code if media_object and not scraper.media_embed(media_object): print "%s made a bad media obj for url %s" % (scraper, url) media_object = None if (secure_media_object and not scraper.media_embed(secure_media_object)): print "%s made a bad secure media obj for url %s" % (scraper, url) secure_media_object = None if thumbnail_image: thumbnail_size = thumbnail_image.size thumbnail_url = upload_media(thumbnail_image) media = Media(media_object, secure_media_object, thumbnail_url, thumbnail_size) # Store the media in the cache (if requested), possibly extending the ttl if use_cache and media is not ERROR_MEDIA: MediaByURL.add(url, media, autoplay=autoplay, maxwidth=maxwidth) return media
def _scrape_media(url, autoplay=False, maxwidth=600, force=False, save_thumbnail=True, use_cache=False, max_cache_age=None): media = None autoplay = bool(autoplay) maxwidth = int(maxwidth) # Use media from the cache (if available) if not force and use_cache: mediaByURL = MediaByURL.get(url, autoplay=autoplay, maxwidth=maxwidth, max_cache_age=max_cache_age) if mediaByURL: media = mediaByURL.media # Otherwise, scrape it if not media: media_object = secure_media_object = None thumbnail_image = thumbnail_url = thumbnail_size = None scraper = Scraper.for_url(url, autoplay=autoplay) try: thumbnail_image, media_object, secure_media_object = ( scraper.scrape()) except (HTTPError, URLError) as e: if use_cache: MediaByURL.add_error(url, str(e), autoplay=autoplay, maxwidth=maxwidth) return None # the scraper should be able to make a media embed out of the # media object it just gave us. if not, null out the media object # to protect downstream code if media_object and not scraper.media_embed(media_object): print "%s made a bad media obj for url %s" % (scraper, url) media_object = None if (secure_media_object and not scraper.media_embed(secure_media_object)): print "%s made a bad secure media obj for url %s" % (scraper, url) secure_media_object = None if thumbnail_image and save_thumbnail: thumbnail_size = thumbnail_image.size thumbnail_url = upload_media(thumbnail_image) media = Media(media_object, secure_media_object, thumbnail_url, thumbnail_size) # Store the media in the cache (if requested), possibly extending the ttl use_cache = use_cache and save_thumbnail # don't cache partial scrape if use_cache and media is not ERROR_MEDIA: MediaByURL.add(url, media, autoplay=autoplay, maxwidth=maxwidth) return media
def _scrape_media(url, autoplay=False, maxwidth=600, force=False, save_thumbnail=True, use_cache=False, max_cache_age=None, use_youtube_scraper=False): media = None autoplay = bool(autoplay) maxwidth = int(maxwidth) # Use media from the cache (if available) if not force and use_cache: mediaByURL = MediaByURL.get(url, autoplay=autoplay, maxwidth=maxwidth, max_cache_age=max_cache_age) if mediaByURL: media = mediaByURL.media # Otherwise, scrape it if thumbnail is not present if not media or not media.thumbnail_url: media_object = secure_media_object = None thumbnail_image = thumbnail_url = thumbnail_size = None scraper = Scraper.for_url(url, autoplay=autoplay, use_youtube_scraper=use_youtube_scraper) try: thumbnail_image, preview_object, media_object, secure_media_object = ( scraper.scrape()) except (HTTPError, URLError) as e: if use_cache: MediaByURL.add_error(url, str(e), autoplay=autoplay, maxwidth=maxwidth) return None # the scraper should be able to make a media embed out of the # media object it just gave us. if not, null out the media object # to protect downstream code if media_object and not scraper.media_embed(media_object): print "%s made a bad media obj for url %s" % (scraper, url) media_object = None if (secure_media_object and not scraper.media_embed(secure_media_object)): print "%s made a bad secure media obj for url %s" % (scraper, url) secure_media_object = None # If thumbnail can't be found, attempt again using _ThumbnailOnlyScraper # This should fix bugs that occur when embed.ly caches links before the # thumbnail is available if (not thumbnail_image and not isinstance(scraper, _ThumbnailOnlyScraper)): scraper = _ThumbnailOnlyScraper(url) try: thumbnail_image, preview_object, _, _ = scraper.scrape() except (HTTPError, URLError) as e: use_cache = False if thumbnail_image and save_thumbnail: thumbnail_size = thumbnail_image.size thumbnail_url = upload_media(thumbnail_image) else: # don't cache if thumbnail is absent use_cache = False media = Media(media_object, secure_media_object, preview_object, thumbnail_url, thumbnail_size) if use_cache and save_thumbnail and media is not ERROR_MEDIA: # Store the media in the cache, possibly extending the ttl MediaByURL.add(url, media, autoplay=autoplay, maxwidth=maxwidth) return media
def _scrape_media(url, autoplay=False, maxwidth=600, force=False, save_thumbnail=True, use_cache=False, max_cache_age=None): media = None autoplay = bool(autoplay) maxwidth = int(maxwidth) # Use media from the cache (if available) if not force and use_cache: mediaByURL = MediaByURL.get(url, autoplay=autoplay, maxwidth=maxwidth, max_cache_age=max_cache_age) if mediaByURL: media = mediaByURL.media # Otherwise, scrape it if not media: media_object = secure_media_object = None thumbnail_image = thumbnail_url = thumbnail_size = None scraper = Scraper.for_url(url, autoplay=autoplay) try: thumbnail_image, preview_object, media_object, secure_media_object = ( scraper.scrape()) except (HTTPError, URLError) as e: if use_cache: MediaByURL.add_error(url, str(e), autoplay=autoplay, maxwidth=maxwidth) return None # the scraper should be able to make a media embed out of the # media object it just gave us. if not, null out the media object # to protect downstream code if media_object and not scraper.media_embed(media_object): print "%s made a bad media obj for url %s" % (scraper, url) media_object = None if (secure_media_object and not scraper.media_embed(secure_media_object)): print "%s made a bad secure media obj for url %s" % (scraper, url) secure_media_object = None # If thumbnail can't be found, attempt again using _ThumbnailOnlyScraper # This should fix bugs that occur when embed.ly caches links before the # thumbnail is available if (not thumbnail_image and not isinstance(scraper, _ThumbnailOnlyScraper)): scraper = _ThumbnailOnlyScraper(url) try: thumbnail_image, preview_object, _, _ = scraper.scrape() except (HTTPError, URLError) as e: use_cache = False if thumbnail_image and save_thumbnail: thumbnail_size = thumbnail_image.size thumbnail_url = upload_media(thumbnail_image) else: # don't cache if thumbnail is absent use_cache = False media = Media(media_object, secure_media_object, preview_object, thumbnail_url, thumbnail_size) if use_cache and save_thumbnail and media is not ERROR_MEDIA: # Store the media in the cache, possibly extending the ttl MediaByURL.add(url, media, autoplay=autoplay, maxwidth=maxwidth) return media