Exemplo n.º 1
0
def on_dataset_delete(sender, **kwargs):
    """
    When a Dataset is deleted, purge its data and metadata from Solr.
    """
    dataset = kwargs["instance"]
    PurgeDataTask.apply_async(args=[dataset.slug])
    solr.delete(settings.SOLR_DATASETS_CORE, "slug:%s" % dataset.slug)
Exemplo n.º 2
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        # Cancel import if necessary 
        if self.current_task:
            self.current_task.request_abort()

        # Cleanup data in Solr
        PurgeDataTask.apply_async(args=[self.slug])
        solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug)

        super(Dataset, self).delete(*args, **kwargs)
Exemplo n.º 3
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        # Cancel import if necessary 
        if self.current_task:
            self.current_task.request_abort()

        # Manually delete related uploads so their delete method is called
        for upload in chain(self.data_uploads.all(), self.related_uploads.all()):
            upload.delete(skip_purge=True)

        # Cleanup data in Solr
        PurgeDataTask.apply_async(args=[self.slug])
        solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug)

        super(Dataset, self).delete(*args, **kwargs)
Exemplo n.º 4
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        skip_purge = kwargs.pop('skip_purge', False)

        # Update related datasets so deletes will not cascade
        if self.initial_upload_for.count():
            for dataset in self.initial_upload_for.all():
                dataset.initial_upload = None
                dataset.save()

        # Cleanup data in Solr
        if self.dataset and self.imported and not skip_purge:
            PurgeDataTask.apply_async(args=[self.dataset.slug, self.id])

        super(DataUpload, self).delete(*args, **kwargs)
Exemplo n.º 5
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        # Cancel import if necessary
        if self.current_task:
            self.current_task.request_abort()

        # Manually delete related uploads so their delete method is called
        for upload in self.data_uploads.all():
            upload.delete(skip_purge=True, force=True)

        for upload in self.related_uploads.all():
            upload.delete()

        # Cleanup data in Solr
        PurgeDataTask.apply_async(args=[self.slug])
        solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % self.slug)

        super(Dataset, self).delete(*args, **kwargs)
Exemplo n.º 6
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        skip_purge = kwargs.pop('skip_purge', False)
        force = kwargs.pop('force', False)

        # Don't allow deletion of dated uploads unless forced
        if not self.deletable and not force:
            raise DataUploadNotDeletable('This data upload was created before deleting individual data uploads was supported. In order to delete it you must delete the entire dataset.')

        # Update related datasets so deletes will not cascade
        if self.initial_upload_for.count():
            for dataset in self.initial_upload_for.all():
                dataset.initial_upload = None
                dataset.save()

        # Cleanup data in Solr
        if self.dataset and self.imported and not skip_purge:
            PurgeDataTask.apply_async(args=[self.dataset.slug, self.id])

        super(DataUpload, self).delete(*args, **kwargs)
Exemplo n.º 7
0
    def delete(self, *args, **kwargs):
        """
        Cancel any in progress task.
        """
        skip_purge = kwargs.pop('skip_purge', False)
        force = kwargs.pop('force', False)

        # Don't allow deletion of dated uploads unless forced
        if not self.deletable and not force:
            raise DataUploadNotDeletable(
                'This data upload was created before deleting individual data uploads was supported. In order to delete it you must delete the entire dataset.'
            )

        # Update related datasets so deletes will not cascade
        if self.initial_upload_for.count():
            for dataset in self.initial_upload_for.all():
                dataset.initial_upload = None
                dataset.save()

        # Cleanup data in Solr
        if self.dataset and self.imported and not skip_purge:
            PurgeDataTask.apply_async(args=[self.dataset.slug, self.id])

        super(DataUpload, self).delete(*args, **kwargs)
Exemplo n.º 8
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=["get"])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get("q", "")
        category = request.GET.get("category", "")
        since = request.GET.get("since", None)
        limit = int(request.GET.get("limit", settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get("offset", 0))
        group_limit = int(request.GET.get("group_limit", settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get("group_offset", 0))
        export = bool(request.GET.get("export", False))

        if category:
            if category != "uncategorized":
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list("slug", flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list("slug", flat=True)

            query += " dataset_slug:(%s)" % " ".join(dataset_slugs)

        if since:
            query = "last_modified:[" + since + "Z TO *] AND (%s)" % query

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name, task_description='Export search results for "%s".' % query, creator=user
            )

            task_type.apply_async(args=[query, task.id], kwargs={}, task_id=task.id)
        else:
            response = solr.query_grouped(
                settings.SOLR_DATA_CORE,
                query,
                "dataset_slug",
                offset=offset,
                limit=limit,
                group_limit=group_limit,
                group_offset=group_offset,
            )
            groups = response["grouped"]["dataset_slug"]["groups"]

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response["grouped"]["dataset_slug"]["ngroups"],
            ).page()

            datasets = []

            for group in groups:
                dataset_slug = group["groupValue"]
                results = group["doclist"]

                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE, "slug:%s" % dataset_slug)

                    page["meta"]["total_count"] -= 1

                    continue

                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
                dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

                objects = [SolrObject(obj) for obj in results["docs"]]

                dataset_search_url = reverse(
                    "api_dataset_data_list",
                    kwargs={
                        "api_name": self._meta.api_name,
                        "dataset_resource_name": "dataset",
                        "resource_name": "data",
                        "dataset_slug": dataset.slug,
                    },
                )

                data_page = PandaPaginator(
                    {"limit": str(group_limit), "offset": str(group_offset), "q": query},
                    objects,
                    resource_uri=dataset_search_url,
                    count=results["numFound"],
                ).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data["objects"] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data["objects"].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page["objects"] = datasets

            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, "Export queued.")
        else:
            return self.create_response(request, page)
Exemplo n.º 9
0
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        query = request.GET.get('q', '')
        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(
            request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(
            request.GET.get('group_limit',
                            settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))
        export = bool(request.GET.get('export', False))

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug',
                                                              flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(
                    categories=None).values_list('slug', flat=True)

            query += ' dataset_slug:(%s)' % ' '.join(dataset_slugs)

        if since:
            query = 'last_modified:[' + since + 'Z TO *] AND (%s)' % query

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description='Export search results for "%s".' % query,
                creator=user)

            task_type.apply_async(args=[query, task.id],
                                  kwargs={},
                                  task_id=task.id)
        else:
            response = solr.query_grouped(settings.SOLR_DATA_CORE,
                                          query,
                                          'dataset_slug',
                                          offset=offset,
                                          limit=limit,
                                          group_limit=group_limit,
                                          group_offset=group_offset)
            groups = response['grouped']['dataset_slug']['groups']

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response['grouped']['dataset_slug']['ngroups']).page()

            datasets = []

            for group in groups:
                dataset_slug = group['groupValue']
                results = group['doclist']

                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE,
                                'slug:%s' % dataset_slug)

                    page['meta']['total_count'] -= 1

                    continue

                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset,
                                                               request=request)
                dataset_bundle = dataset_resource.full_dehydrate(
                    dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(
                    dataset_bundle)

                objects = [SolrObject(obj) for obj in results['docs']]

                dataset_search_url = reverse('api_dataset_data_list',
                                             kwargs={
                                                 'api_name':
                                                 self._meta.api_name,
                                                 'dataset_resource_name':
                                                 'dataset',
                                                 'resource_name': 'data',
                                                 'dataset_slug': dataset.slug
                                             })

                data_page = PandaPaginator(
                    {
                        'limit': str(group_limit),
                        'offset': str(group_offset),
                        'q': query
                    },
                    objects,
                    resource_uri=dataset_search_url,
                    count=results['numFound']).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data['objects'] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data['objects'].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page['objects'] = datasets

            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, 'Export queued.')
        else:
            return self.create_response(request, page)
Exemplo n.º 10
0
Arquivo: data.py Projeto: Rawadx/panda
    def search_all_data(self, request, **kwargs):
        """
        List endpoint using Solr. Provides full-text search via the "q" parameter."
        """
        self.method_check(request, allowed=['get'])
        self.is_authenticated(request)
        self.throttle_check(request)

        try:
            query = '(%s)' % request.GET['q']
        except KeyError:
            query = ''

        category = request.GET.get('category', '')
        since = request.GET.get('since', None)
        limit = int(request.GET.get('limit', settings.PANDA_DEFAULT_SEARCH_GROUPS))
        offset = int(request.GET.get('offset', 0))
        group_limit = int(request.GET.get('group_limit', settings.PANDA_DEFAULT_SEARCH_ROWS_PER_GROUP))
        group_offset = int(request.GET.get('group_offset', 0))
        export = bool(request.GET.get('export', False))

        solr_query_bits = [query]

        if category:
            if category != 'uncategorized':
                category = Category.objects.get(slug=category)
                dataset_slugs = category.datasets.values_list('slug', flat=True)
            else:
                dataset_slugs = Dataset.objects.filter(categories=None).values_list('slug', flat=True) 

            solr_query_bits.append('dataset_slug:(%s)' % ' '.join(dataset_slugs))

        if since:
            solr_query_bits.append('last_modified:[' + since + 'Z TO *]')

        # Because users may have authenticated via headers the request.user may
        # not be a full User instance. To be sure, we fetch one.
        user = UserProxy.objects.get(id=request.user.id)

        if export:
            task_type = ExportSearchTask

            task = TaskStatus.objects.create(
                task_name=task_type.name,
                task_description=_('Export search results for "%s".') % query,
                creator=user
            )

            task_type.apply_async(
                args=[query, task.id],
                kwargs={},
                task_id=task.id
            )
        else:
            response = solr.query_grouped(
                settings.SOLR_DATA_CORE,
                ' AND '.join(solr_query_bits),
                'dataset_slug',
                offset=offset,
                limit=limit,
                group_limit=group_limit,
                group_offset=group_offset
            )
            groups = response['grouped']['dataset_slug']['groups']

            page = PandaPaginator(
                request.GET,
                groups,
                resource_uri=request.path_info,
                count=response['grouped']['dataset_slug']['ngroups']
            ).page()

            datasets = []

            for group in groups:
                dataset_slug = group['groupValue']
                results = group['doclist']
                
                try:
                    dataset = Dataset.objects.get(slug=dataset_slug)
                # In the event that stale data exists in Solr, skip this dataset,
                # request the invalid data be purged and return the other results.
                # Pagination may be wrong, but this is the most functional solution. (#793)
                except Dataset.DoesNotExist:
                    PurgeDataTask.apply_async(args=[dataset_slug])
                    solr.delete(settings.SOLR_DATASETS_CORE, 'slug:%s' % dataset_slug)

                    page['meta']['total_count'] -= 1

                    continue
                
                dataset_resource = DatasetResource()
                dataset_bundle = dataset_resource.build_bundle(obj=dataset, request=request)
                dataset_bundle = dataset_resource.full_dehydrate(dataset_bundle)
                dataset_bundle = dataset_resource.simplify_bundle(dataset_bundle)

                objects = [SolrObject(obj) for obj in results['docs']]
                
                dataset_search_url = reverse('api_dataset_data_list', kwargs={ 'api_name': self._meta.api_name, 'dataset_resource_name': 'dataset', 'resource_name': 'data', 'dataset_slug': dataset.slug })

                data_page = PandaPaginator(
                    { 'limit': str(group_limit), 'offset': str(group_offset), 'q': query },
                    objects,
                    resource_uri=dataset_search_url,
                    count=results['numFound']
                ).page()

                dataset_bundle.data.update(data_page)
                dataset_bundle.data['objects'] = []

                for obj in objects:
                    data_bundle = self.build_bundle(obj=obj, request=request)
                    data_bundle = self.full_dehydrate(data_bundle)
                    dataset_bundle.data['objects'].append(data_bundle)

                datasets.append(dataset_bundle.data)

            page['objects'] = datasets
            
            # Log query
            SearchLog.objects.create(user=user, dataset=None, query=query)

        self.log_throttled_access(request)

        if export:
            return self.create_response(request, _('Export queued.'))
        else:
            return self.create_response(request, page)