def get_results(self): cache_key = self._get_cache_key() results = cache.get(cache_key) if results is None: finish_by = time.time() + 20 search_results = auto_search( self.cleaned_data["query"], order_by=self.cleaned_data["order_by"], api_keys=API_KEYS ) results = [] for vidscraper_video in intersperse_results(search_results, 40): try: vidscraper_video.load() except VidscraperError: pass except Exception: logging.error("error while loading search result: %r", vidscraper_video.url, exc_info=True) else: results.append(vidscraper_video) if time.time() > finish_by: break # don't take forever! cache.set(cache_key, results) for vidscraper_video in results: video = Video.from_vidscraper_video(vidscraper_video, commit=False) if video.embed_code or video.file_url: yield video
def get_results(self): cache_key = self._get_cache_key() results = cache.get(cache_key) if results is None: finish_by = time.time() + 20 search_results = auto_search( self.cleaned_data['query'], order_by=self.cleaned_data['order_by'], api_keys=API_KEYS) results = [] for vidscraper_video in intersperse_results(search_results, 40): try: vidscraper_video.load() except VidscraperError: pass except Exception: logging.error('error while loading search result: %r', vidscraper_video.url, exc_info=True) else: results.append(vidscraper_video) if time.time() > finish_by: break # don't take forever! cache.set(cache_key, results) for vidscraper_video in results: video = Video.from_vidscraper_video(vidscraper_video, commit=False) if video.embed_code or video.file_url: yield video
def video_from_vidscraper_video(vidscraper_video, site_pk, import_app_label=None, import_model=None, import_pk=None, status=None, author_pks=None, category_pks=None, clear_rejected=False, using='default'): import_class = get_model(import_app_label, import_model) try: source_import = import_class.objects.using(using).get( pk=import_pk, status=import_class.STARTED) except import_class.DoesNotExist: logging.warn('Retrying %r: expected %s instance (pk=%r) missing.', vidscraper_video.url, import_class.__name__, import_pk) video_from_vidscraper_video.retry() try: try: vidscraper_video.load() except Exception: source_import.handle_error( ('Skipped %r: Could not load video data.' % vidscraper_video.url), using=using, is_skip=True, with_exception=True) return if category_pks: categories = Category.objects.using(using).filter( pk__in=category_pks) else: categories = None if author_pks: authors = User.objects.using(using).filter(pk__in=author_pks) else: authors = None video = Video.from_vidscraper_video(vidscraper_video, status=status, using=using, source_import=source_import, authors=authors, categories=categories, site_pk=site_pk, commit=False, update_index=False) try: video.full_clean() except ValidationError, e: source_import.handle_error( ("Skipping %r: %r" % (vidscraper_video.url, e.message_dict)), is_skip=True, using=using) return else:
def video_from_vidscraper_video(video_dict, site_pk, import_app_label=None, import_model=None, import_pk=None, status=None, author_pks=None, category_pks=None, clear_rejected=False): vidscraper_video = VidscraperVideo.deserialize(video_dict, API_KEYS) import_class = get_model(import_app_label, import_model) try: source_import = import_class.objects.get( pk=import_pk, status=import_class.STARTED) except import_class.DoesNotExist: logging.warn('Retrying %r: expected %s instance (pk=%r) missing.', vidscraper_video.url, import_class.__name__, import_pk) video_from_vidscraper_video.retry() try: try: vidscraper_video.load() except Exception: source_import.handle_error( ('Skipped %r: Could not load video data.' % vidscraper_video.url), is_skip=True, with_exception=True) return if category_pks: categories = Category.objects.filter(pk__in=category_pks) else: categories = None if author_pks: authors = User.objects.filter(pk__in=author_pks) else: authors = None video = Video.from_vidscraper_video(vidscraper_video, status=status, source_import=source_import, authors=authors, categories=categories, site_pk=site_pk, commit=False, update_index=False) try: video.clean_fields() # If clear_rejected is True, we've already deleted any rejected # videos, so there's no need to explicitly exclude them. # If clear_rejected is False, this is not the first run, and # so rejected videos need to not be excluded in this check. video._check_for_duplicates(exclude_rejected=False) video.validate_unique() except ValidationError, e: source_import.handle_error(("Skipping %r: %r" % ( vidscraper_video.url, e.message)), is_skip=True) return else:
def get_results(self): cache_key = self._get_cache_key() results = cache.get(cache_key) if results is None: finish_by = time.time() + 20 search_results = auto_search(self.cleaned_data['q'], order_by=self.cleaned_data['order_by'], api_keys=self.get_search_api_keys()) results = [] for vidscraper_video in intersperse_results(search_results, 40): try: vidscraper_video.load() except VidscraperError: pass else: results.append(vidscraper_video) if time.time() > finish_by: break # don't take forever! cache.set(cache_key, results) for vidscraper_video in results: video = Video.from_vidscraper_video(vidscraper_video, commit=False) if video.embed_code or video.file_url: yield video
def video_from_vidscraper_video(vidscraper_video, site_pk, import_app_label=None, import_model=None, import_pk=None, status=None, author_pks=None, category_pks=None, clear_rejected=False, using='default'): import_class = get_model(import_app_label, import_model) try: source_import = import_class.objects.using(using).get( pk=import_pk, status=import_class.STARTED) except import_class.DoesNotExist: logging.warn('Retrying %r: expected %s instance (pk=%r) missing.', vidscraper_video.url, import_class.__name__, import_pk) video_from_vidscraper_video.retry() try: try: vidscraper_video.load() except Exception: source_import.handle_error( ('Skipped %r: Could not load video data.' % vidscraper_video.url), using=using, is_skip=True, with_exception=True) return if not vidscraper_video.title: source_import.handle_error( ('Skipped %r: Failed to scrape basic data.' % vidscraper_video.url), is_skip=True, using=using) return if ((vidscraper_video.file_url_expires or not vidscraper_video.file_url) and not vidscraper_video.embed_code): source_import.handle_error( ('Skipping %r: no file or embed code.' % vidscraper_video.url), is_skip=True, using=using) return site_videos = Video.objects.using(using).filter(site=site_pk) if vidscraper_video.guid: guid_videos = site_videos.filter(guid=vidscraper_video.guid) if clear_rejected: guid_videos.filter(status=Video.REJECTED).delete() if guid_videos.exists(): source_import.handle_error( ('Skipping %r: duplicate guid.' % vidscraper_video.url), is_skip=True, using=using) return if vidscraper_video.link: videos_with_link = site_videos.filter( website_url=vidscraper_video.link) if clear_rejected: videos_with_link.filter(status=Video.REJECTED).delete() if videos_with_link.exists(): source_import.handle_error( ('Skipping %r: duplicate link.' % vidscraper_video.url), is_skip=True, using=using) return categories = Category.objects.using(using).filter(pk__in=category_pks) if author_pks: authors = User.objects.using(using).filter(pk__in=author_pks) else: if vidscraper_video.user: name = vidscraper_video.user if ' ' in name: first, last = name.split(' ', 1) else: first, last = name, '' author, created = User.objects.db_manager(using).get_or_create( username=name[:30], defaults={ 'first_name': first[:30], 'last_name': last[:30] }) if created: author.set_unusable_password() author.save() utils.get_profile_model().objects.db_manager(using).create( user=author, website=vidscraper_video.user_url or '') authors = [author] else: authors = [] # Since we check above whether the vidscraper_video is valid, we don't # catch InvalidVideo here, since it would be unexpected. We don't # update the index because this is expected to be run as part of the # import process; the video will be indexed in bulk after the feed # import is complete. video = Video.from_vidscraper_video(vidscraper_video, status=status, using=using, source_import=source_import, authors=authors, categories=categories, site_pk=site_pk, update_index=False) logging.debug('Made video %i: %r', video.pk, video.name) if video.thumbnail_url: video_save_thumbnail.delay(video.pk, using=using) except Exception: source_import.handle_error( ('Unknown error during import of %r' % vidscraper_video.url), is_skip=True, using=using, with_exception=True) raise # so it shows up in the Celery log
def get_object(self): if self.video is not None: return Video.from_vidscraper_video(self.video, commit=False) return Video()
author.set_unusable_password() author.save() utils.get_profile_model().objects.db_manager(using).create( user=author, website=vidscraper_video.user_url or "" ) authors = [author] else: authors = [] # Since we check above whether the vidscraper_video is valid, we don't # catch InvalidVideo here, since it would be unexpected. video = Video.from_vidscraper_video( vidscraper_video, status=status, using=using, source_import=source_import, authors=authors, categories=categories, site_pk=site_pk, ) logging.debug("Made video %i: %r", video.pk, video.name) if video.thumbnail_url: video_save_thumbnail.delay(video.pk, using=using) except Exception: source_import.handle_error( ("Unknown error during import of %r" % vidscraper_video.url), is_skip=True, using=using, with_exception=True ) raise # so it shows up in the Celery log @task(ignore_result=True)
def get_object(self): if self.video is not None: return Video.from_vidscraper_video(self.video, commit=False) return Video()
def video_from_vidscraper_video(vidscraper_video, site_pk, import_app_label=None, import_model=None, import_pk=None, status=None, author_pks=None, category_pks=None, clear_rejected=False, using='default'): import_class = get_model(import_app_label, import_model) try: source_import = import_class.objects.using(using).get( pk=import_pk, status=import_class.STARTED) except import_class.DoesNotExist: logging.warn('Retrying %r: expected %s instance (pk=%r) missing.', vidscraper_video.url, import_class.__name__, import_pk) video_from_vidscraper_video.retry() try: try: vidscraper_video.load() except Exception: source_import.handle_error( ('Skipped %r: Could not load video data.' % vidscraper_video.url), using=using, is_skip=True, with_exception=True) return if not vidscraper_video.title: source_import.handle_error( ('Skipped %r: Failed to scrape basic data.' % vidscraper_video.url), is_skip=True, using=using) return if ((vidscraper_video.file_url_expires or not vidscraper_video.file_url) and not vidscraper_video.embed_code): source_import.handle_error(('Skipping %r: no file or embed code.' % vidscraper_video.url), is_skip=True, using=using) return site_videos = Video.objects.using(using).filter(site=site_pk) if vidscraper_video.guid: guid_videos = site_videos.filter(guid=vidscraper_video.guid) if clear_rejected: guid_videos.filter(status=Video.REJECTED).delete() if guid_videos.exists(): source_import.handle_error(('Skipping %r: duplicate guid.' % vidscraper_video.url), is_skip=True, using=using) return if vidscraper_video.link: videos_with_link = site_videos.filter( website_url=vidscraper_video.link) if clear_rejected: videos_with_link.filter(status=Video.REJECTED).delete() if videos_with_link.exists(): source_import.handle_error(('Skipping %r: duplicate link.' % vidscraper_video.url), is_skip=True, using=using) return categories = Category.objects.using(using).filter(pk__in=category_pks) if author_pks: authors = User.objects.using(using).filter(pk__in=author_pks) else: if vidscraper_video.user: name = vidscraper_video.user if ' ' in name: first, last = name.split(' ', 1) else: first, last = name, '' author, created = User.objects.db_manager(using).get_or_create( username=name[:30], defaults={'first_name': first[:30], 'last_name': last[:30]}) if created: author.set_unusable_password() author.save() utils.get_profile_model().objects.db_manager(using).create( user=author, website=vidscraper_video.user_url or '') authors = [author] else: authors = [] # Since we check above whether the vidscraper_video is valid, we don't # catch InvalidVideo here, since it would be unexpected. We don't # update the index because this is expected to be run as part of the # import process; the video will be indexed in bulk after the feed # import is complete. video = Video.from_vidscraper_video(vidscraper_video, status=status, using=using, source_import=source_import, authors=authors, categories=categories, site_pk=site_pk, update_index=False) logging.debug('Made video %i: %r', video.pk, video.name) if video.thumbnail_url: video_save_thumbnail.delay(video.pk, using=using) except Exception: source_import.handle_error(('Unknown error during import of %r' % vidscraper_video.url), is_skip=True, using=using, with_exception=True) raise # so it shows up in the Celery log