Exemplo n.º 1
0
def on_dataset_delete(sender, **kwargs):
    """
    When a Dataset is deleted, purge its data and metadata from Solr.
    """
    dataset = kwargs["instance"]
    PurgeDataTask.apply_async(args=[dataset.slug])
    solr.delete(settings.SOLR_DATASETS_CORE, "slug:%s" % dataset.slug)
Exemplo n.º 2
0
    def run(self, dataset_slug):
        log = logging.getLogger('panda.tasks.purge.data')
        log.info('Beginning purge, dataset_slug: %s' % dataset_slug)

        solr.delete(settings.SOLR_DATA_CORE, 'dataset_slug:%s' % dataset_slug)

        log.info('Finished purge, dataset_slug: %s' % dataset_slug)
Exemplo n.º 3
0
    def after_return(self, status, retval, task_id, args, kwargs, einfo):
        """
        Save final status, results, etc.
        """
        from panda.models import Dataset

        log = logging.getLogger(self.name)

        try:
            dataset = Dataset.objects.get(slug=args[0])
        except Dataset.DoesNotExist:
            log.warning(
                'Can not send reindexing notifications due to Dataset being deleted, dataset_slug: %s'
                % args[0])

            return

        try:
            try:
                self.send_notifications(dataset, retval, einfo)
            finally:
                # If reindex failed, clear any data that might be staged
                if dataset.current_task.status == 'FAILURE':
                    solr.delete(settings.SOLR_DATA_CORE,
                                'dataset_slug:%s' % args[0],
                                commit=True)
        finally:
            dataset.unlock()
Exemplo n.º 4
0
    def test_delete(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)
        upload_id = upload.id
        path = upload.get_path()

        self.assertEqual(os.path.isfile(path), True)

        solr.delete(settings.SOLR_DATA_CORE, '*:*')
        self.dataset.import_data(self.user, upload)
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 1)

        upload = DataUpload.objects.get(id=upload_id)
        
        dataset = Dataset.objects.get(id=self.dataset.id)
        self.assertEqual(dataset.initial_upload, upload)
        self.assertEqual(dataset.row_count, 4)

        upload.delete()

        # Ensure dataset still exists
        dataset = Dataset.objects.get(id=self.dataset.id)
        self.assertEqual(dataset.initial_upload, None)
        self.assertEqual(dataset.row_count, 0)

        self.assertEqual(os.path.exists(path), False)

        with self.assertRaises(DataUpload.DoesNotExist):
            DataUpload.objects.get(id=upload_id)
        
        self.assertEqual(solr.query(settings.SOLR_DATA_CORE, 'Christopher')['response']['numFound'], 0)
Exemplo n.º 5
0
    def test_delete(self):
        upload = utils.get_test_data_upload(self.user, self.dataset)
        upload_id = upload.id
        path = upload.get_path()

        self.assertEqual(os.path.isfile(path), True)

        solr.delete(settings.SOLR_DATA_CORE, '*:*')
        self.dataset.import_data(self.user, upload)
        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'Christopher')['response']['numFound'], 1)

        upload = DataUpload.objects.get(id=upload_id)

        dataset = Dataset.objects.get(id=self.dataset.id)
        self.assertEqual(dataset.initial_upload, upload)
        self.assertEqual(dataset.row_count, 4)

        upload.delete()

        # Ensure dataset still exists
        dataset = Dataset.objects.get(id=self.dataset.id)
        self.assertEqual(dataset.initial_upload, None)
        self.assertEqual(dataset.row_count, 0)

        self.assertEqual(os.path.exists(path), False)

        with self.assertRaises(DataUpload.DoesNotExist):
            DataUpload.objects.get(id=upload_id)

        self.assertEqual(
            solr.query(settings.SOLR_DATA_CORE,
                       'Christopher')['response']['numFound'], 0)
Exemplo n.º 6
0
    def delete_all_rows(self, user,):
        """
        Delete all rows in this dataset.
        """
        solr.delete(settings.SOLR_DATA_CORE, 'dataset_slug:%s' % self.slug, commit=True)

        old_row_count = self.row_count
        self.row_count = 0
        self.last_modified = datetime.utcnow()
        self.last_modification = 'All %i rows deleted' % old_row_count
        self.save()
Exemplo n.º 7
0
 def delete_row(self, user, external_id):
     """
     Delete a row in this dataset.
     """
     solr.delete(settings.SOLR_DATA_CORE, 'dataset_slug:%s AND external_id:%s' % (self.slug, external_id), commit=True)
 
     self.row_count = self._count_rows()
     self.last_modified = datetime.utcnow()
     self.last_modified_by = user
     self.last_modification = '1 row deleted'
     self.save()
Exemplo n.º 8
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        # Cancel import if necessary 
        if self.current_task:
            self.current_task.request_abort()

        # Cleanup data in Solr
        PurgeDataTask.apply_async(args=[self.slug])
        solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug)

        super(Dataset, self).delete(*args, **kwargs)
Exemplo n.º 9
0
    def delete_all_rows(self, user):
        """
        Delete all rows in this dataset.
        """
        self.lock()

        try:
            solr.delete(settings.SOLR_DATA_CORE, "dataset_slug:%s" % self.slug, commit=True)

            old_row_count = self.row_count
            self.row_count = 0
            self.last_modified = datetime.utcnow()
            self.last_modification = "All %i rows deleted" % old_row_count
            self.save()
        finally:
            self.unlock()
Exemplo n.º 10
0
    def delete_row(self, user, external_id):
        """
        Delete a row in this dataset.
        """
        self.lock()

        try:
            solr.delete(settings.SOLR_DATA_CORE, 'dataset_slug:%s AND external_id:%s' % (self.slug, external_id), commit=True)
        
            self.row_count = self._count_rows()
            self.last_modified = now()
            self.last_modified_by = user
            self.last_modification = _('1 row deleted')
            self.save()
        finally:
            self.unlock()
Exemplo n.º 11
0
    def delete_all_rows(self, user,):
        """
        Delete all rows in this dataset.
        """
        self.lock()

        try:
            solr.delete(settings.SOLR_DATA_CORE, 'dataset_slug:%s' % self.slug, commit=True)

            old_row_count = self.row_count
            self.row_count = 0
            self.last_modified = now()
            self.last_modification = _('All %i rows deleted') % old_row_count or 0
            self.save()
        finally:
            self.unlock()
Exemplo n.º 12
0
    def after_return(self, status, retval, task_id, args, kwargs, einfo):
        """
        Save final status, results, etc.
        """
        from panda.models import Dataset, Notification

        dataset = Dataset.objects.get(slug=args[0])
        task_status = dataset.current_task 

        if einfo:
            self.task_exception(
                task_status,
                'Import failed',
                u'\n'.join([einfo.traceback, unicode(retval)])
            )
            
            email_subject = 'Import failed: %s' % dataset.name
            email_message = 'Import failed: %s:\n\nhttp://%s/#dataset/%s' % (dataset.name, config_value('DOMAIN', 'SITE_DOMAIN'), dataset.slug)
            notification_message = 'Import failed: <strong>%s</strong>' % dataset.name
            notification_type = 'Error'
        elif self.is_aborted():
            email_subject = 'Import aborted: %s' % dataset.name
            email_message = 'Import aborted: %s:\n\nhttp://%s/#dataset/%s' % (dataset.name, config_value('DOMAIN', 'SITE_DOMAIN'), dataset.slug)
            notification_message = 'Import aborted: <strong>%s</strong>' % dataset.name
            notification_type = 'Info'
        else:
            self.task_complete(task_status, 'Import complete')
            
            email_subject = 'Import complete: %s' % dataset.name
            email_message = 'Import complete: %s:\n\nhttp://%s/#dataset/%s' % (dataset.name, config_value('DOMAIN', 'SITE_DOMAIN'), dataset.slug)
            notification_message = 'Import complete: <strong>%s</strong>' % dataset.name
            notification_type = 'Info'
        
        if task_status.creator:
            Notification.objects.create(
                recipient=task_status.creator,
                related_task=task_status,
                related_dataset=dataset,
                message=notification_message,
                type=notification_type
            )

            send_mail(email_subject, email_message, [task_status.creator.username])

        # If import failed, clear any data that might be staged
        if task_status.status == 'FAILURE':
            solr.delete(settings.SOLR_DATA_CORE, 'dataset_slug:%s' % args[0], commit=True)
Exemplo n.º 13
0
    def test_change_user_reindex(self):
        solr.delete(settings.SOLR_DATASETS_CORE, '*:*') 

        self.user.first_name = 'bazbarfoo'
        self.user.save()

        dataset = utils.get_test_dataset(self.user)
        upload = utils.get_test_data_upload(self.user, dataset)
        
        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
        old_name = dataset.creator.first_name

        dataset.creator.first_name = 'foobarbaz'
        dataset.creator.save()

        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, old_name)['response']['numFound'], 0)
        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
Exemplo n.º 14
0
    def after_return(self, status, retval, task_id, args, kwargs, einfo):
        """
        Save final status, results, etc.
        """
        from panda.models import Dataset

        dataset = Dataset.objects.get(slug=args[0])

        try:
            try:
                self.send_notifications(dataset, retval, einfo)
            finally:
                # If import failed, clear any data that might be staged
                if dataset.current_task.status == "FAILURE":
                    solr.delete(settings.SOLR_DATA_CORE, "dataset_slug:%s" % args[0], commit=True)
        finally:
            dataset.unlock()
Exemplo n.º 15
0
    def test_change_user_reindex(self):
        solr.delete(settings.SOLR_DATASETS_CORE, '*:*') 

        self.user.first_name = 'bazbarfoo'
        self.user.save()

        dataset = utils.get_test_dataset(self.user)
        upload = utils.get_test_data_upload(self.user, dataset)
        
        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
        old_name = dataset.creator.first_name

        dataset.creator.first_name = 'foobarbaz'
        dataset.creator.save()

        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, old_name)['response']['numFound'], 0)
        self.assertEqual(solr.query(settings.SOLR_DATASETS_CORE, dataset.creator.first_name)['response']['numFound'], 1)
Exemplo n.º 16
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        # Cancel import if necessary 
        if self.current_task:
            self.current_task.request_abort()

        # Manually delete related uploads so their delete method is called
        for upload in chain(self.data_uploads.all(), self.related_uploads.all()):
            upload.delete(skip_purge=True)

        # Cleanup data in Solr
        PurgeDataTask.apply_async(args=[self.slug])
        solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug)

        super(Dataset, self).delete(*args, **kwargs)
Exemplo n.º 17
0
    def delete_row(self, user, external_id):
        """
        Delete a row in this dataset.
        """
        self.lock()

        try:
            solr.delete(settings.SOLR_DATA_CORE,
                        'dataset_slug:%s AND external_id:%s' %
                        (self.slug, external_id),
                        commit=True)

            self.row_count = self._count_rows()
            self.last_modified = now()
            self.last_modified_by = user
            self.last_modification = _('1 row deleted')
            self.save()
        finally:
            self.unlock()
Exemplo n.º 18
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        # Cancel import if necessary
        if self.current_task:
            self.current_task.request_abort()

        # Manually delete related uploads so their delete method is called
        for upload in self.data_uploads.all():
            upload.delete(skip_purge=True, force=True)

        for upload in self.related_uploads.all():
            upload.delete()

        # Cleanup data in Solr
        PurgeDataTask.apply_async(args=[self.slug])
        solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug)

        super(Dataset, self).delete(*args, **kwargs)
Exemplo n.º 19
0
    def delete_all_rows(
        self,
        user,
    ):
        """
        Delete all rows in this dataset.
        """
        self.lock()

        try:
            solr.delete(settings.SOLR_DATA_CORE,
                        'dataset_slug:%s' % self.slug,
                        commit=True)

            old_row_count = self.row_count
            self.row_count = 0
            self.last_modified = now()
            self.last_modification = 'All %i rows deleted' % old_row_count or 0
            self.save()
        finally:
            self.unlock()
Exemplo n.º 20
0
    def run(self, dataset_slug, data_upload_id=None):
        from panda.models import Dataset

        log = logging.getLogger(self.name)
        log.info('Beginning purge, dataset_slug: %s' % dataset_slug)

        if data_upload_id:
            q = 'data_upload_id:%i' % data_upload_id
        else:
            q = 'dataset_slug:%s' % dataset_slug

        solr.delete(settings.SOLR_DATA_CORE, q)

        try:
            # If the dataset hasn't been deleted, update its row count
            dataset = Dataset.objects.get(slug=dataset_slug)
            dataset.row_count = dataset._count_rows()
            dataset.save()
        except Dataset.DoesNotExist:
            pass

        log.info('Finished purge, dataset_slug: %s' % dataset_slug)
Exemplo n.º 21
0
    def run(self, dataset_slug, data_upload_id=None):
        from panda.models import Dataset

        log = logging.getLogger(self.name)
        log.info('Beginning purge, dataset_slug: %s' % dataset_slug)

        if data_upload_id:
            q = 'data_upload_id:%i' % data_upload_id
        else:
            q = 'dataset_slug:%s' % dataset_slug

        solr.delete(settings.SOLR_DATA_CORE, q)

        try:
            # If the dataset hasn't been deleted, update its row count
            dataset = Dataset.objects.get(slug=dataset_slug)
            dataset.row_count = dataset._count_rows()
            dataset.save()
        except Dataset.DoesNotExist:
            pass

        log.info('Finished purge, dataset_slug: %s' % dataset_slug)
Exemplo n.º 22
0
    def after_return(self, status, retval, task_id, args, kwargs, einfo):
        """
        Save final status, results, etc.
        """
        from panda.models import Dataset

        log = logging.getLogger(self.name)

        try:
            dataset = Dataset.objects.get(slug=args[0])
        except Dataset.DoesNotExist:
            log.warning('Can not send reindexing notifications due to Dataset being deleted, dataset_slug: %s' % args[0])

            return

        try:
            try:
                self.send_notifications(dataset, retval, einfo)
            finally:
                # If reindex failed, clear any data that might be staged
                if dataset.current_task.status == 'FAILURE':
                    solr.delete(settings.SOLR_DATA_CORE, 'dataset_slug:%s' % args[0], commit=True)
        finally:
            dataset.unlock()
Exemplo n.º 23
0
Arquivo: data.py Projeto: Rawadx/panda
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        try:
            query = '(%s)' % request.GET['q']
        except KeyError:
            query = ''

        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))
        export = bool(request.GET.get('export', False))

        solr_query_bits = [query]

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug', flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list('slug', flat=True) 

            solr_query_bits.append('dataset_slug:(%s)' % ' '.join(dataset_slugs))

        if since:
            solr_query_bits.append('last_modified:[' + since + 'Z TO *]')

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description=_('Export search results for "%s".') % query,
                creator=user
            )

            task_type.apply_async(
                args=[query, task.id],
                kwargs={},
                task_id=task.id
            )
        else:
            response = solr.query_grouped(
                settings.SOLR_DATA_CORE,
                ' AND '.join(solr_query_bits),
                'dataset_slug',
                offset=offset,
                limit=limit,
                group_limit=group_limit,
                group_offset=group_offset
            )
            groups = response['grouped']['dataset_slug']['groups']

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response['grouped']['dataset_slug']['ngroups']
            ).page()

            datasets = []

            for group in groups:
                dataset_slug = group['groupValue']
                results = group['doclist']
                
                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % dataset_slug)

                    page['meta']['total_count'] -= 1

                    continue
                
                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
                dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

                objects = [SolrObject(obj) for obj in results['docs']]
                
                dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })

                data_page = PandaPaginator(
                    { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
                    objects,
                    resource_uri=dataset_search_url,
                    count=results['numFound']
                ).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data['objects'] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data['objects'].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page['objects'] = datasets
            
            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, _('Export queued.'))
        else:
            return self.create_response(request, page)
Exemplo n.º 24
0
def setup_test_solr():
    settings.SOLR_DATA_CORE = 'data_test'
    settings.SOLR_DATASETS_CORE = 'datasets_test'
    config_get('PERF', 'TASK_THROTTLE').update(0.0) 
    solr.delete(settings.SOLR_DATA_CORE, '*:*')
    solr.delete(settings.SOLR_DATASETS_CORE, '*:*')
Exemplo n.º 25
0
Arquivo: utils.py Projeto: eads/panda
def setup_test_solr():
    settings.SOLR_DATA_CORE = 'data_test'
    settings.SOLR_DATASETS_CORE = 'datasets_test'
    config_get('PERF', 'TASK_THROTTLE').update(0.0) 
    solr.delete(settings.SOLR_DATA_CORE, '*:*')
    solr.delete(settings.SOLR_DATASETS_CORE, '*:*')
Exemplo n.º 26
0
Arquivo: utils.py Projeto: eob/panda
def setup_test_solr():
    settings.SOLR_DATA_CORE = 'data_test'
    settings.SOLR_DATASETS_CORE = 'datasets_test'
    solr.delete(settings.SOLR_DATA_CORE, '*:*')
    solr.delete(settings.SOLR_DATASETS_CORE, '*:*')
Exemplo n.º 27
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get('q', '')
        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(
            request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(
            request.GET.get('group_limit',
                            settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))
        export = bool(request.GET.get('export', False))

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug',
                                                              flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(
                    categories=None).values_list('slug', flat=True)

            query += ' dataset_slug:(%s)' % ' '.join(dataset_slugs)

        if since:
            query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description='Export search results for "%s".' % query,
                creator=user)

            task_type.apply_async(args=[query, task.id],
                                  kwargs={},
                                  task_id=task.id)
        else:
            response = solr.query_grouped(settings.SOLR_DATA_CORE,
                                          query,
                                          'dataset_slug',
                                          offset=offset,
                                          limit=limit,
                                          group_limit=group_limit,
                                          group_offset=group_offset)
            groups = response['grouped']['dataset_slug']['groups']

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response['grouped']['dataset_slug']['ngroups']).page()

            datasets = []

            for group in groups:
                dataset_slug = group['groupValue']
                results = group['doclist']

                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE,
                                'slug:%s' % dataset_slug)

                    page['meta']['total_count'] -= 1

                    continue

                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset,
                                                               request=request)
                dataset_bundle = dataset_resource.full_dehydrate(
                    dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(
                    dataset_bundle)

                objects = [SolrObject(obj) for obj in results['docs']]

                dataset_search_url = reverse('api_dataset_data_list',
                                             kwargs={
                                                 'api_name':
                                                 self._meta.api_name,
                                                 'dataset_resource_name':
                                                 'dataset',
                                                 'resource_name': 'data',
                                                 'dataset_slug': dataset.slug
                                             })

                data_page = PandaPaginator(
                    {
                        'limit': str(group_limit),
                        'offset': str(group_offset),
                        'q': query
                    },
                    objects,
                    resource_uri=dataset_search_url,
                    count=results['numFound']).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data['objects'] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data['objects'].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page['objects'] = datasets

            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, 'Export queued.')
        else:
            return self.create_response(request, page)
Exemplo n.º 28
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=["get"])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get("q", "")
        category = request.GET.get("category", "")
        since = request.GET.get("since", None)
        limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get("offset", 0))
        group_limit = int(request.GET.get("group_limit", settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get("group_offset", 0))
        export = bool(request.GET.get("export", False))

        if category:
            if category != "uncategorized":
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list("slug", flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list("slug", flat=True)

            query += " dataset_slug:(%s)" % " ".join(dataset_slugs)

        if since:
            query = "last_modified:[" + since + "Z TO *] AND (%s)" % query

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name, task_description='Export search results for "%s".' % query, creator=user
            )

            task_type.apply_async(args=[query, task.id], kwargs={}, task_id=task.id)
        else:
            response = solr.query_grouped(
                settings.SOLR_DATA_CORE,
                query,
                "dataset_slug",
                offset=offset,
                limit=limit,
                group_limit=group_limit,
                group_offset=group_offset,
            )
            groups = response["grouped"]["dataset_slug"]["groups"]

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response["grouped"]["dataset_slug"]["ngroups"],
            ).page()

            datasets = []

            for group in groups:
                dataset_slug = group["groupValue"]
                results = group["doclist"]

                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE, "slug:%s" % dataset_slug)

                    page["meta"]["total_count"] -= 1

                    continue

                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
                dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

                objects = [SolrObject(obj) for obj in results["docs"]]

                dataset_search_url = reverse(
                    "api_dataset_data_list",
                    kwargs={
                        "api_name": self._meta.api_name,
                        "dataset_resource_name": "dataset",
                        "resource_name": "data",
                        "dataset_slug": dataset.slug,
                    },
                )

                data_page = PandaPaginator(
                    {"limit": str(group_limit), "offset": str(group_offset), "q": query},
                    objects,
                    resource_uri=dataset_search_url,
                    count=results["numFound"],
                ).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data["objects"] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data["objects"].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page["objects"] = datasets

            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, "Export queued.")
        else:
            return self.create_response(request, page)