Python SuperSearch.get示例，crashstats.supersearch.models.SuperSearch.get Python示例

示例#1

0

显示文件

文件： views.py 项目： lonnen/socorro

def assert_supersearch_no_errors():
    """Make sure an uncached SuperSearch query doesn't have any errors"""
    supersearch = SuperSearch()
    # We don't want any caching this time
    supersearch.cache_seconds = 0
    results = supersearch.get(
        product=settings.DEFAULT_PRODUCT,
        _results_number=1,
        _columns=['uuid'],
        _facets_size=1,
    )
    assert not results['errors'], results['errors']

示例#2

0

显示文件

文件： views.py 项目： yang123vc/socorro

def assert_supersearch_no_errors():
    """Make sure an uncached SuperSearch query doesn't have any errors"""
    supersearch = SuperSearch()
    # We don't want any caching this time
    supersearch.cache_seconds = 0
    results = supersearch.get(
        product=settings.DEFAULT_PRODUCT,
        _results_number=1,
        _columns=['uuid'],
        _facets_size=1,
    )
    assert not results['errors'], results['errors']

示例#3

0

显示文件

文件： models.py 项目： johnmcwade/socorro

    def get(self, **kwargs):
        form = forms.NewSignaturesForm(kwargs)

        if not form.is_valid():
            return http.JsonResponse({"errors": form.errors}, status=400)

        start_date = form.cleaned_data["start_date"]
        end_date = form.cleaned_data["end_date"]
        not_after = form.cleaned_data["not_after"]
        product = form.cleaned_data[
            "product"] or productlib.get_default_product().name

        # Make default values for all dates parameters.
        if not end_date:
            end_date = datetime.datetime.utcnow().date() + datetime.timedelta(
                days=1)

        if not start_date:
            start_date = end_date - datetime.timedelta(days=8)

        if not not_after:
            not_after = start_date - datetime.timedelta(days=14)

        api = SuperSearch()

        signatures_number = 100

        # First let's get a list of the top signatures that appeared during
        # the period we are interested in.
        params = {
            "product": product,
            "version": form.cleaned_data["version"],
            "date":
            [">=" + start_date.isoformat(), "<" + end_date.isoformat()],
            "_facets": "signature",
            "_facets_size": signatures_number,
            "_results_number": 0,
        }
        data = api.get(**params)

        signatures = []
        for signature in data["facets"]["signature"]:
            signatures.append(signature["term"])

        # Now we want to verify whether those signatures appeared or not during
        # some previous period of time.
        params["date"] = [
            ">=" + not_after.isoformat(), "<" + start_date.isoformat()
        ]

        # Filter exactly the signatures that we have.
        params["signature"] = ["=" + x for x in signatures]

        data = api.get(**params)

        # If any of those signatures is in the results, it's that it did not
        # appear during the period of time we are interested in. Let's
        # remove it from the list of new signatures.
        for signature in data["facets"]["signature"]:
            if signature["term"] in signatures:
                signatures.remove(signature["term"])

        # All remaining signatures are "new" ones.
        return {"hits": signatures, "total": len(signatures)}

示例#4

0

显示文件

文件： views.py 项目： ceddy-cedd/socorro

def graphics_report(request):
    """Return a CSV output of all crashes for a specific date for a
    particular day and a particular product."""
    if (not request.user.is_active
            or not request.user.has_perm('crashstats.run_long_queries')):
        return http.HttpResponseForbidden(
            "You must have the 'Run long queries' permission")
    form = forms.GraphicsReportForm(request.GET, )
    if not form.is_valid():
        return http.HttpResponseBadRequest(str(form.errors))

    batch_size = 1000
    product = form.cleaned_data['product'] or settings.DEFAULT_PRODUCT
    date = form.cleaned_data['date']
    params = {
        'product':
        product,
        'date': [
            '>={}'.format(date.strftime('%Y-%m-%d')), '<{}'.format(
                (date + datetime.timedelta(days=1)).strftime('%Y-%m-%d'))
        ],
        '_columns': (
            'signature',
            'uuid',
            'date',
            'product',
            'version',
            'build_id',
            'platform',
            'platform_version',
            'cpu_name',
            'cpu_info',
            'address',
            'uptime',
            'topmost_filenames',
            'reason',
            'app_notes',
            'release_channel',
        ),
        '_results_number':
        batch_size,
        '_results_offset':
        0,
    }
    api = SuperSearch()
    # Do the first query. That'll give us the total and the first page's
    # worth of crashes.
    data = api.get(**params)
    assert 'hits' in data

    accept_gzip = 'gzip' in request.META.get('HTTP_ACCEPT_ENCODING', '')
    response = http.HttpResponse(content_type='text/csv')
    out = BytesIO()
    writer = utils.UnicodeWriter(out, delimiter='\t')
    writer.writerow(GRAPHICS_REPORT_HEADER)
    pages = data['total'] // batch_size
    # if there is a remainder, add one more page
    if data['total'] % batch_size:
        pages += 1
    alias = {
        'crash_id': 'uuid',
        'os_name': 'platform',
        'os_version': 'platform_version',
        'date_processed': 'date',
        'build': 'build_id',
        'uptime_seconds': 'uptime',
    }
    # Make sure that we don't have an alias for a header we don't need
    alias_excess = set(alias.keys()) - set(GRAPHICS_REPORT_HEADER)
    if alias_excess:
        raise ValueError('Not all keys in the map of aliases are in '
                         'the header ({!r})'.format(alias_excess))

    def get_value(row, key):
        """Return the appropriate output from the row of data, one key
        at a time. The output is what's used in writing the CSV file.

        The reason for doing these "hacks" is to match what used to be
        done with the SELECT statement in SQL in the ancient, but now
        replaced, report.
        """
        value = row.get(alias.get(key, key))
        if key == 'cpu_info':
            value = '{cpu_name} | {cpu_info}'.format(
                cpu_name=row.get('cpu_name', ''),
                cpu_info=row.get('cpu_info', ''),
            )
        if value is None:
            return ''
        if key == 'date_processed':
            value = timezone.make_aware(
                datetime.datetime.strptime(
                    value.split('.')[0], '%Y-%m-%dT%H:%M:%S'))
            value = value.strftime('%Y%m%d%H%M')
        if key == 'uptime_seconds' and value == 0:
            value = ''
        return value

    for page in range(pages):
        if page > 0:
            params['_results_offset'] = batch_size * page
            data = api.get(**params)

        for row in data['hits']:
            # Each row is a dict, we want to turn it into a list of
            # exact order as the `header` tuple above.
            # However, because the csv writer module doesn't "understand"
            # python's None, we'll replace those with '' to make the
            # CSV not have the word 'None' where the data is None.
            writer.writerow(
                [get_value(row, x) for x in GRAPHICS_REPORT_HEADER])

    payload = out.getvalue()
    if accept_gzip:
        zbuffer = BytesIO()
        zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuffer)
        zfile.write(payload)
        zfile.close()
        compressed_payload = zbuffer.getvalue()
        response.write(compressed_payload)
        response['Content-Length'] = len(compressed_payload)
        response['Content-Encoding'] = 'gzip'
    else:
        response.write(payload)
        response['Content-Length'] = len(payload)
    return response

示例#5

0

显示文件

    def get(self, **kwargs):
        form = forms.NewSignaturesForm(kwargs)

        if not form.is_valid():
            return http.JsonResponse({'errors': form.errors}, status=400)

        start_date = form.cleaned_data['start_date']
        end_date = form.cleaned_data['end_date']
        not_after = form.cleaned_data['not_after']
        product = form.cleaned_data['product'] or settings.DEFAULT_PRODUCT

        # Make default values for all dates parameters.
        if not end_date:
            end_date = (datetime.datetime.utcnow().date() +
                        datetime.timedelta(days=1))

        if not start_date:
            start_date = end_date - datetime.timedelta(days=8)

        if not not_after:
            not_after = start_date - datetime.timedelta(days=14)

        api = SuperSearch()

        signatures_number = 100

        # First let's get a list of the top signatures that appeared during
        # the period we are interested in.
        params = {
            'product': product,
            'version': form.cleaned_data['version'],
            'date': [
                '>=' + start_date.isoformat(),
                '<' + end_date.isoformat(),
            ],
            '_facets': 'signature',
            '_facets_size': signatures_number,
            '_results_number': 0,
        }
        data = api.get(**params)

        signatures = []
        for signature in data['facets']['signature']:
            signatures.append(signature['term'])

        # Now we want to verify whether those signatures appeared or not during
        # some previous period of time.
        params['date'] = [
            '>=' + not_after.isoformat(),
            '<' + start_date.isoformat(),
        ]

        # Filter exactly the signatures that we have.
        params['signature'] = ['=' + x for x in signatures]

        data = api.get(**params)

        # If any of those signatures is in the results, it's that it did not
        # appear during the period of time we are interested in. Let's
        # remove it from the list of new signatures.
        for signature in data['facets']['signature']:
            if signature['term'] in signatures:
                signatures.remove(signature['term'])

        # All remaining signatures are "new" ones.
        return {'hits': signatures, 'total': len(signatures)}

示例#6

0

显示文件

文件： models.py 项目： 4thAce/socorro

    def get(self, **kwargs):
        form = forms.NewSignaturesForm(kwargs)

        if not form.is_valid():
            return http.JsonResponse({
                'errors': form.errors
            }, status=400)

        start_date = form.cleaned_data['start_date']
        end_date = form.cleaned_data['end_date']
        not_after = form.cleaned_data['not_after']
        product = form.cleaned_data['product'] or settings.DEFAULT_PRODUCT

        # Make default values for all dates parameters.
        if not end_date:
            end_date = (
                datetime.datetime.utcnow().date() + datetime.timedelta(days=1)
            )

        if not start_date:
            start_date = end_date - datetime.timedelta(days=8)

        if not not_after:
            not_after = start_date - datetime.timedelta(days=14)

        api = SuperSearch()

        signatures_number = 100

        # First let's get a list of the top signatures that appeared during
        # the period we are interested in.
        params = {
            'product': product,
            'version': form.cleaned_data['version'],
            'date': [
                '>=' + start_date.isoformat(),
                '<' + end_date.isoformat(),
            ],
            '_facets': 'signature',
            '_facets_size': signatures_number,
            '_results_number': 0,
        }
        data = api.get(**params)

        signatures = []
        for signature in data['facets']['signature']:
            signatures.append(signature['term'])

        # Now we want to verify whether those signatures appeared or not during
        # some previous period of time.
        params['date'] = [
            '>=' + not_after.isoformat(),
            '<' + start_date.isoformat(),
        ]

        # Filter exactly the signatures that we have.
        params['signature'] = ['=' + x for x in signatures]

        data = api.get(**params)

        # If any of those signatures is in the results, it's that it did not
        # appear during the period of time we are interested in. Let's
        # remove it from the list of new signatures.
        for signature in data['facets']['signature']:
            if signature['term'] in signatures:
                signatures.remove(signature['term'])

        # All remaining signatures are "new" ones.
        return {
            'hits': signatures,
            'total': len(signatures)
        }

示例#7

0

显示文件

    def handle(self, **options):
        start_datetime = options.get("last_success")
        end_datetime = options.get("run_time")

        if end_datetime:
            end_datetime = parse_datetime(end_datetime)
        else:
            end_datetime = timezone.now()

        if start_datetime:
            start_datetime = parse_datetime(start_datetime)
            # When run via cronrun, start_datetime is based on the last success
            # and we want to increase the window by 10 minutes to get some
            # overlap with the previous run
            start_datetime = start_datetime - datetime.timedelta(minutes=10)
        else:
            # Default to end_datetime - 90 minutes
            start_datetime = end_datetime - datetime.timedelta(minutes=90)

        # Truncate seconds and microseconds
        start_datetime = start_datetime.replace(second=0, microsecond=0)
        end_datetime = end_datetime.replace(second=0, microsecond=0)

        if not end_datetime > start_datetime:
            raise CommandError("start time must be before end time.")

        # Do a super search and get the signature, buildid, and date processed for
        # every crash in the range
        all_fields = SuperSearchFieldsData().get()
        api = SuperSearch()
        self.stdout.write("Looking at %s to %s" % (start_datetime, end_datetime))

        params = {
            "date": [
                f">={start_datetime.isoformat()}",
                f"<{end_datetime.isoformat()}",
            ],
            "_columns": ["signature", "build_id", "date"],
            "_facets_size": 0,
            "_fields": all_fields,
            # Set up first page
            "_results_offset": 0,
            "_results_number": MAX_PAGE,
        }

        results = {}
        crashids_count = 0

        while True:
            resp = api.get(**params)
            hits = resp["hits"]
            for hit in hits:
                crashids_count += 1

                if not hit["build_id"]:
                    # Not all crashes have a build id, so skip the ones that don't.
                    continue

                if hit["signature"] in results:
                    data = results[hit["signature"]]
                    data["build_id"] = min(data["build_id"], hit["build_id"])
                    data["date"] = min(data["date"], hit["date"])
                else:
                    data = {
                        "signature": hit["signature"],
                        "build_id": hit["build_id"],
                        "date": hit["date"],
                    }
                results[hit["signature"]] = data

            # If there are no more crash ids to get, we return
            total = resp["total"]
            if not hits or crashids_count >= total:
                break

            # Get the next page, but only as many results as we need
            params["_results_offset"] += MAX_PAGE
            params["_results_number"] = min(
                # MAX_PAGE is the maximum we can request
                MAX_PAGE,
                # The number of results Super Search can return to us that is hasn't returned so far
                total - crashids_count,
            )

        signature_data = results.values()

        # Save signature data to the db
        for item in signature_data:
            if options["dry_run"]:
                self.stdout.write(
                    "Inserting/updating signature (%s, %s, %s)"
                    % (item["signature"], item["date"], item["build_id"])
                )
            else:
                self.update_crashstats_signature(
                    signature=item["signature"],
                    report_date=item["date"],
                    report_build=item["build_id"],
                )

        self.stdout.write("Inserted/updated %d signatures." % len(signature_data))

示例#8

0

显示文件

文件： updatesignatures.py 项目： yang123vc/socorro

    def handle(self, **options):
        start_datetime = options.get('last_success')
        end_datetime = options.get('run_time')

        if end_datetime:
            end_datetime = parse_datetime(end_datetime)
        else:
            end_datetime = timezone.now()

        if start_datetime:
            start_datetime = parse_datetime(start_datetime)
            # When run via cronrun, start_datetime is based on the last success
            # and we want to increase the window by 10 minutes to get some
            # overlap with the previous run
            start_datetime = start_datetime - datetime.timedelta(minutes=10)
        else:
            # Default to end_datetime - 90 minutes
            start_datetime = end_datetime - datetime.timedelta(minutes=90)

        # Truncate seconds and microseconds
        start_datetime = start_datetime.replace(second=0, microsecond=0)
        end_datetime = end_datetime.replace(second=0, microsecond=0)

        if not end_datetime > start_datetime:
            raise CommandError('start time must be before end time.')

        # Do a super search and get the signature, buildid, and date processed for
        # every crash in the range
        all_fields = SuperSearchFieldsData().get()
        api = SuperSearch()
        self.stdout.write('Looking at %s to %s' % (start_datetime, end_datetime))

        params = {
            'date': [
                '>={}'.format(start_datetime.isoformat()),
                '<{}'.format(end_datetime.isoformat()),
            ],
            '_columns': ['signature', 'build_id', 'date'],
            '_facets_size': 0,
            '_fields': all_fields,

            # Set up first page
            '_results_offset': 0,
            '_results_number': MAX_PAGE,
        }

        results = {}
        crashids_count = 0

        while True:
            resp = api.get(**params)
            hits = resp['hits']
            for hit in hits:
                crashids_count += 1

                if not hit['build_id']:
                    # Not all crashes have a build id, so skip the ones that don't.
                    continue

                if hit['signature'] in results:
                    data = results[hit['signature']]
                    data['build_id'] = min(data['build_id'], hit['build_id'])
                    data['date'] = min(data['date'], hit['date'])
                else:
                    data = {
                        'signature': hit['signature'],
                        'build_id': hit['build_id'],
                        'date': hit['date']
                    }
                results[hit['signature']] = data

            # If there are no more crash ids to get, we return
            total = resp['total']
            if not hits or crashids_count >= total:
                break

            # Get the next page, but only as many results as we need
            params['_results_offset'] += MAX_PAGE
            params['_results_number'] = min(
                # MAX_PAGE is the maximum we can request
                MAX_PAGE,

                # The number of results Super Search can return to us that is hasn't returned so far
                total - crashids_count
            )

        signature_data = results.values()

        # Save signature data to the db
        for item in signature_data:
            if options['dry_run']:
                self.stdout.write(
                    'Inserting/updating signature (%s, %s, %s)' %
                    (item['signature'], item['date'], item['build_id'])
                )
            else:
                self.update_crashstats_signature(
                    signature=item['signature'],
                    report_date=item['date'],
                    report_build=item['build_id'],
                )

        self.stdout.write('Inserted/updated %d signatures.' % len(signature_data))

示例#9

0

显示文件

文件： views.py 项目： adngdb/socorro

def graphics_report(request):
    """Return a CSV output of all crashes for a specific date for a
    particular day and a particular product."""
    if (
        not request.user.is_active or
        not request.user.has_perm('crashstats.run_long_queries')
    ):
        return http.HttpResponseForbidden(
            "You must have the 'Run long queries' permission"
        )
    form = forms.GraphicsReportForm(
        request.GET,
    )
    if not form.is_valid():
        return http.HttpResponseBadRequest(str(form.errors))

    batch_size = 1000
    product = form.cleaned_data['product'] or settings.DEFAULT_PRODUCT
    date = form.cleaned_data['date']
    params = {
        'product': product,
        'date': [
            '>={}'.format(date.strftime('%Y-%m-%d')),
            '<{}'.format(
                (date + datetime.timedelta(days=1)).strftime('%Y-%m-%d')
            )
        ],
        '_columns': (
            'signature',
            'uuid',
            'date',
            'product',
            'version',
            'build_id',
            'platform',
            'platform_version',
            'cpu_name',
            'cpu_info',
            'address',
            'uptime',
            'topmost_filenames',
            'reason',
            'app_notes',
            'release_channel',
        ),
        '_results_number': batch_size,
        '_results_offset': 0,
    }
    api = SuperSearch()
    # Do the first query. That'll give us the total and the first page's
    # worth of crashes.
    data = api.get(**params)
    assert 'hits' in data

    accept_gzip = 'gzip' in request.META.get('HTTP_ACCEPT_ENCODING', '')
    response = http.HttpResponse(content_type='text/csv')
    out = BytesIO()
    writer = utils.UnicodeWriter(out, delimiter='\t')
    writer.writerow(GRAPHICS_REPORT_HEADER)
    pages = data['total'] // batch_size
    # if there is a remainder, add one more page
    if data['total'] % batch_size:
        pages += 1
    alias = {
        'crash_id': 'uuid',
        'os_name': 'platform',
        'os_version': 'platform_version',
        'date_processed': 'date',
        'build': 'build_id',
        'uptime_seconds': 'uptime',
    }
    # Make sure that we don't have an alias for a header we don't need
    alias_excess = set(alias.keys()) - set(GRAPHICS_REPORT_HEADER)
    if alias_excess:
        raise ValueError(
            'Not all keys in the map of aliases are in '
            'the header ({!r})'.format(alias_excess)
        )

    def get_value(row, key):
        """Return the appropriate output from the row of data, one key
        at a time. The output is what's used in writing the CSV file.

        The reason for doing these "hacks" is to match what used to be
        done with the SELECT statement in SQL in the ancient, but now
        replaced, report.
        """
        value = row.get(alias.get(key, key))
        if key == 'cpu_info':
            value = '{cpu_name} | {cpu_info}'.format(
                cpu_name=row.get('cpu_name', ''),
                cpu_info=row.get('cpu_info', ''),
            )
        if value is None:
            return ''
        if key == 'date_processed':
            value = timezone.make_aware(datetime.datetime.strptime(
                value.split('.')[0],
                '%Y-%m-%dT%H:%M:%S'
            ))
            value = value.strftime('%Y%m%d%H%M')
        if key == 'uptime_seconds' and value == 0:
            value = ''
        return value

    for page in range(pages):
        if page > 0:
            params['_results_offset'] = batch_size * page
            data = api.get(**params)

        for row in data['hits']:
            # Each row is a dict, we want to turn it into a list of
            # exact order as the `header` tuple above.
            # However, because the csv writer module doesn't "understand"
            # python's None, we'll replace those with '' to make the
            # CSV not have the word 'None' where the data is None.
            writer.writerow([
                get_value(row, x)
                for x in GRAPHICS_REPORT_HEADER
            ])

    payload = out.getvalue()
    if accept_gzip:
        zbuffer = BytesIO()
        zfile = gzip.GzipFile(mode='wb', compresslevel=6, fileobj=zbuffer)
        zfile.write(payload)
        zfile.close()
        compressed_payload = zbuffer.getvalue()
        response.write(compressed_payload)
        response['Content-Length'] = len(compressed_payload)
        response['Content-Encoding'] = 'gzip'
    else:
        response.write(payload)
        response['Content-Length'] = len(payload)
    return response