Exemplo n.º 1
0
    def test_basic(self):
        total = 10

        for _ in range(total):
            self.create_user()

        qs = User.objects.all()

        assert len(list(RangeQuerySetWrapper(qs, step=2))) == total
        assert len(list(RangeQuerySetWrapper(qs, limit=5))) == 5
Exemplo n.º 2
0
def cleanup_event_attachment_files(apps, schema_editor):
    """
    Previously, cleanup task code did a SQL bulk delete on EventAttachment
    leaving orphaned File and FileBlob objects. These orphaned files now need to
    be purged as they are still consuming space.
    """
    EventAttachment = apps.get_model("sentry", "EventAttachment")
    File = apps.get_model("sentry", "File")

    # Find the oldest live attachment as we only want to purge old files.
    # If there are not files skip everything.
    oldest_attachment = EventAttachment.objects.all().aggregate(Min("date_added"))
    if not oldest_attachment or oldest_attachment["date_added__min"] is None:
        return

    # File types used in event attachments.
    attachment_types = [
        "event.applecrashreport",
        "event.attachment",
        "event.payload",
        "event.minidump",
        "unreal.context",
        "unreal.logs",
    ]
    file_query = File.objects.filter(timestamp__lt=oldest_attachment["date_added__min"]).filter(
        type__in=attachment_types
    )

    for f in RangeQuerySetWrapper(file_query):
        # Double check that the file is not referenced.
        if not EventAttachment.objects.filter(file=f).exists():
            f.delete()
Exemplo n.º 3
0
def prepare_reports(dry_run=False, *args, **kwargs):
    timestamp, duration = _fill_default_parameters(*args, **kwargs)

    logger.info("reports.begin_prepare_report")

    organizations = _get_organization_queryset().values_list("id", flat=True)
    for i, organization_id in enumerate(
            RangeQuerySetWrapper(organizations,
                                 step=10000,
                                 result_value_getter=lambda item: item)):
        prepare_organization_report.delay(timestamp,
                                          duration,
                                          organization_id,
                                          dry_run=dry_run)
        if i % 10000 == 0:
            logger.info(
                "reports.scheduled_prepare_organization_report",
                extra={
                    "organization_id": organization_id,
                    "total_scheduled": i
                },
            )

    default_cache.set(prepare_reports_verify_key(), "1",
                      int(timedelta(days=3).total_seconds()))
    logger.info("reports.finish_prepare_report")
Exemplo n.º 4
0
def backfill_group_ids(model):
    query = model.objects.filter(group_id__isnull=True)

    for attachment in RangeQuerySetWrapper(query, step=1000):
        event = eventstore.get_event_by_id(attachment.project_id, attachment.event_id)
        if event:
            model.objects.filter(id=attachment.id).update(group_id=event.group_id)
Exemplo n.º 5
0
def cleanup_unused_files(quiet=False):
    """
    Remove FileBlob's (and thus the actual files) if they are no longer
    referenced by any File.

    We set a minimum-age on the query to ensure that we don't try to remove
    any blobs which are brand new and potentially in the process of being
    referenced.
    """
    from sentry.models import File, FileBlob, FileBlobIndex

    if quiet:
        from sentry.utils.query import RangeQuerySetWrapper
    else:
        from sentry.utils.query import RangeQuerySetWrapperWithProgressBar as RangeQuerySetWrapper

    cutoff = timezone.now() - timedelta(days=1)
    queryset = FileBlob.objects.filter(timestamp__lte=cutoff)

    for blob in RangeQuerySetWrapper(queryset):
        if FileBlobIndex.objects.filter(blob=blob).exists():
            continue
        if File.objects.filter(blob=blob).exists():
            continue
        blob.delete()
Exemplo n.º 6
0
def repair_callsigns():
    from sentry.utils.query import RangeQuerySetWrapperWithProgressBar, \
        RangeQuerySetWrapper
    from sentry.models.counter import increment_project_counter
    from sentry.models import Organization, Group, Project

    click.echo('Repairing callsigns')

    queryset = Organization.objects.all()

    for org in RangeQuerySetWrapperWithProgressBar(queryset):
        projects = list(org.project_set.all())
        callsigns = get_callsigns(projects)
        for project in projects:
            if project.callsign is None:
                Project.objects.filter(
                    pk=project.id,
                    callsign=None).update(callsign=callsigns[project.id])
            q = Group.objects.filter(
                project=project,
                short_id=None,
            )
            for group in RangeQuerySetWrapper(q):
                with catchable_atomic():
                    pending_short_id = increment_project_counter(project)
                    updated = Group.objects.filter(
                        pk=group.id,
                        short_id=None).update(short_id=pending_short_id)
                    if updated == 0:
                        raise RollbackLocally()
Exemplo n.º 7
0
    def handle(self, **options):
        def _attach_fks(_events):
            project_ids = set([event.project_id for event in _events])
            projects = {
                p.id: p
                for p in Project.objects.filter(id__in=project_ids)
            }
            group_ids = set([event.group_id for event in _events])
            groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)}
            for event in _events:
                event.project = projects[event.project_id]
                event.group = groups[event.group_id]

        from sentry import eventstream
        from sentry.utils.query import RangeQuerySetWrapper

        from_ts = options['from_ts']
        to_ts = options['to_ts']
        from_id = options['from_id']
        to_id = options['to_id']

        if (from_ts or to_ts) and (from_id or to_id):
            raise CommandError(
                'You can either limit by primary key, or by timestamp.')
        elif from_ts and to_ts:
            events = self.get_events_by_timestamp(from_ts, to_ts)
        elif from_id and to_id:
            events = self.get_events_by_id(from_id, to_id)
        else:
            raise CommandError(
                'Invalid arguments: either use --from/--to-id, or --from/--to-ts.'
            )

        count = events.count()
        self.stdout.write('Events to process: {}\n'.format(count))

        if count == 0:
            self.stdout.write('Nothing to do.\n')
            sys.exit(0)

        if not options['no_input']:
            proceed = raw_input('Do you want to continue? [y/N] ')
            if proceed.lower() not in ['yes', 'y']:
                raise CommandError('Aborted.')

        for event in RangeQuerySetWrapper(events, callbacks=(_attach_fks, )):
            primary_hash = event.get_primary_hash()
            eventstream.insert(
                group=event.group,
                event=event,
                is_new=False,
                is_sample=False,
                is_regression=False,
                is_new_group_environment=False,
                primary_hash=primary_hash,
                skip_consume=True,
            )

        self.stdout.write('Done.\n')
    def get_group_tag_value_iter(self, project_id, group_id, environment_id, key, callbacks=()):
        from sentry.utils.query import RangeQuerySetWrapper

        qs = self.get_group_tag_value_qs(
            project_id, group_id, environment_id, key
        )

        return RangeQuerySetWrapper(queryset=qs, callbacks=callbacks)
def delete_alert_rules_incidents(apps, schema_editor):
    from sentry.utils.query import RangeQuerySetWrapper

    Incident = apps.get_model("sentry", "Incident")
    AlertRule = apps.get_model("sentry", "AlertRule")
    TimeSeriesSnapshot = apps.get_model("sentry", "TimeSeriesSnapshot")
    QuerySubscription = apps.get_model("sentry", "QuerySubscription")

    for incident in RangeQuerySetWrapper(Incident.objects.all()):
        incident.delete()

    for alert_rule in RangeQuerySetWrapper(AlertRule.objects.all()):
        alert_rule.delete()

    for snapshot in RangeQuerySetWrapper(TimeSeriesSnapshot.objects.all()):
        snapshot.delete()

    for sub in RangeQuerySetWrapper(QuerySubscription.objects.all()):
        sub.delete()
Exemplo n.º 10
0
    def test_loop_and_delete(self):
        total = 10
        for _ in range(total):
            self.create_user()

        qs = User.objects.all()

        for user in RangeQuerySetWrapper(qs, step=2):
            user.delete()

        assert User.objects.all().count() == 0
Exemplo n.º 11
0
def prepare_reports(dry_run=False, *args, **kwargs):
    timestamp, duration = _fill_default_parameters(*args, **kwargs)

    logger.info("reports.begin_prepare_report")

    organizations = _get_organization_queryset()
    for organization in RangeQuerySetWrapper(organizations, step=10000):
        prepare_organization_report.delay(timestamp,
                                          duration,
                                          organization.id,
                                          dry_run=dry_run)
def backfill_file_type(apps, schema_editor):
    """
    Fill the new EventAttachment.type column with values from EventAttachment.file.type.
    """
    EventAttachment = apps.get_model("sentry", "EventAttachment")
    all_event_attachments = EventAttachment.objects.select_related(
        "file").all()
    for event_attachment in RangeQuerySetWrapper(all_event_attachments,
                                                 step=1000):
        if event_attachment.type is None:
            event_attachment.type = event_attachment.file.type
            event_attachment.save(update_fields=["type"])
Exemplo n.º 13
0
    def iterator_generic(self, chunk_size):
        from sentry.utils.query import RangeQuerySetWrapper
        qs = self.get_generic_queryset()

        chunk = []
        for item in RangeQuerySetWrapper(qs):
            chunk.append(item.id)
            if len(chunk) == chunk_size:
                yield tuple(chunk)
                chunk = []
        if chunk:
            yield tuple(chunk)
Exemplo n.º 14
0
    def forwards(self, orm):
        from sentry.utils.query import RangeQuerySetWrapper

        queryset = orm['sentry.File'].objects.all()
        for file in RangeQuerySetWrapper(queryset):
            if file.size:
                continue
            orm['sentry.File'].objects.filter(id=file.id).update(
                size=sum([
                    fbi.blob.size
                    for fbi in orm['sentry.FileBlobIndex'].objects.filter(
                        file=file,
                    ).select_related('blob')
                ]),
            )
Exemplo n.º 15
0
def get_scim_teams_members(
    team_list: Sequence[Team],
) -> MutableMapping[Team, MutableSequence[MutableMapping[str, Any]]]:
    members = RangeQuerySetWrapper(
        OrganizationMember.objects.filter(teams__in=team_list)
        .select_related("user")
        .prefetch_related("teams")
        .distinct("id"),
        limit=10000,
    )
    member_map: MutableMapping[Team, MutableSequence[MutableMapping[str, Any]]] = defaultdict(list)
    for member in members:
        for team in member.teams.all():
            member_map[team].append({"value": str(member.id), "display": member.get_email()})
    return member_map
def backfill_snuba_query_event_type(apps, schema_editor):
    """
    This backfills all SnubaQuery rows that don't have a `SnubaQueryEventType`.
    """
    SnubaQuery = apps.get_model("sentry", "SnubaQuery")
    SnubaQueryEventType = apps.get_model("sentry", "SnubaQueryEventType")

    for snuba_query in RangeQuerySetWrapper(SnubaQuery.objects.all()):
        if not SnubaQueryEventType.objects.filter(
                snuba_query=snuba_query).exists():
            # 0 is SnubaQueryEventType.EventTypes.ERROR,
            # 2 is SnubaQueryEventType.EventTypes.TRANSACTION.
            SnubaQueryEventType.objects.create(
                snuba_query=snuba_query,
                type=(0 if snuba_query.dataset == "events" else 2))
Exemplo n.º 17
0
    def get(self, request, organization, project, group_id, key):
        try:
            # TODO(tkaemming): This should *actually* redirect, see similar
            # comment in ``GroupEndpoint.convert_args``.
            group, _ = get_group_with_redirect(
                group_id,
                queryset=Group.objects.filter(project=project),
            )
        except Group.DoesNotExist:
            raise Http404

        if tagstore.is_reserved_key(key):
            lookup_key = 'sentry:{0}'.format(key)
        else:
            lookup_key = key

        try:
            environment_id = self._get_environment_id_from_request(
                request, project.organization_id)
        except Environment.DoesNotExist:
            # if the environment doesn't exist then the tag can't possibly exist
            raise Http404

        # validate existance as it may be deleted
        try:
            tagstore.get_tag_key(project.id, environment_id, lookup_key)
        except tagstore.TagKeyNotFound:
            raise Http404

        if key == 'user':
            callbacks = [attach_eventuser(project.id)]
        else:
            callbacks = []

        queryset = RangeQuerySetWrapper(
            tagstore.get_group_tag_value_qs(group.project_id, group.id,
                                            environment_id, lookup_key),
            callbacks=callbacks,
        )

        filename = '{}-{}'.format(
            group.qualified_short_id or group.id,
            key,
        )

        return self.to_csv_response(queryset, filename, key=key)
Exemplo n.º 18
0
    def get(self, request, organization, project, team, group_id, key):
        try:
            # TODO(tkaemming): This should *actually* redirect, see similar
            # comment in ``GroupEndpoint.convert_args``.
            group, _ = get_group_with_redirect(
                group_id,
                queryset=Group.objects.filter(project=project),
            )
        except Group.DoesNotExist:
            raise Http404

        if TagKey.is_reserved_key(key):
            lookup_key = 'sentry:{0}'.format(key)
        else:
            lookup_key = key

        # validate existance as it may be deleted
        try:
            TagKey.objects.get(
                project_id=group.project_id,
                key=lookup_key,
                status=TagKeyStatus.VISIBLE,
            )
        except TagKey.DoesNotExist:
            raise Http404

        if key == 'user':
            callbacks = [attach_eventuser(project.id)]
        else:
            callbacks = []

        queryset = RangeQuerySetWrapper(
            GroupTagValue.objects.filter(
                group_id=group.id,
                key=lookup_key,
            ),
            callbacks=callbacks,
        )

        filename = '{}-{}'.format(
            group.qualified_short_id or group.id,
            key,
        )

        return self.to_csv_response(queryset, filename, key=key)
Exemplo n.º 19
0
def backfill_user_reports(apps, schema_editor):
    """
    Processes user reports that are missing event data, and adds the appropriate data
    if the event exists in Clickhouse.
    """
    UserReport = apps.get_model("sentry", "UserReport")

    user_reports = UserReport.objects.filter(group_id__isnull=True, environment_id__isnull=True)

    for report in RangeQuerySetWrapper(user_reports, step=1000):
        try:
            event = eventstore.get_event_by_id(report.project_id, report.event_id)
        except (SnubaError, QueryOutsideGroupActivityError, QueryOutsideRetentionError) as se:
            logger.warn(
                "failed to fetch event %s for project %d: %s"
                % (report.event_id, report.project_id, se)
            )
            continue

        if event:
            report.update(group_id=event.group_id, environment_id=event.get_environment().id)
Exemplo n.º 20
0
def cleanup(days=30, project=None, **kwargs):
    """
    Deletes a portion of the trailing data in Sentry based on
    their creation dates. For example, if ``days`` is 30, this
    would attempt to clean up all data thats older than 30 days.

    :param project: limit all deletion scopes to messages that are part
                    of the given project
    """
    import datetime

    from django.utils import timezone

    from sentry.models import (Group, Event, MessageCountByMinute,
                               MessageFilterValue, FilterKey, FilterValue,
                               ProjectCountByMinute, SearchDocument, Activity,
                               AffectedUserByGroup, LostPasswordHash)
    from sentry.utils.query import RangeQuerySetWrapper

    GENERIC_DELETES = (
        (SearchDocument, 'date_changed'),
        (MessageCountByMinute, 'date'),
        (ProjectCountByMinute, 'date'),
        (MessageFilterValue, 'last_seen'),
        (Event, 'datetime'),
        (Activity, 'datetime'),
        (AffectedUserByGroup, 'last_seen'),

        # Group should probably be last
        (Group, 'last_seen'),
    )

    log = cleanup.get_logger()

    ts = timezone.now() - datetime.timedelta(days=days)

    # Remove types which can easily be bound to project + date
    for model, date_col in GENERIC_DELETES:
        log.info("Removing %r for days=%s project=%r", model, days, project
                 or '*')
        qs = model.objects.filter(**{'%s__lte' % (date_col, ): ts})
        if project:
            qs = qs.filter(project=project)
        # XXX: we step through because the deletion collector will pull all relations into memory
        for obj in RangeQuerySetWrapper(qs):
            log.info("Removing %r", obj)
            obj.delete()

    log.info("Removing expired values for %r", LostPasswordHash)
    LostPasswordHash.objects.filter(date_added__lte=timezone.now() -
                                    datetime.timedelta(days=1)).delete()

    # We'll need this to confirm deletion of FilterKey and Filtervalue objects.
    mqs = MessageFilterValue.objects.all()
    if project:
        mqs = mqs.filter(project=project)

    # FilterKey
    log.info("Removing %r for days=%s project=%r", FilterKey, days, project
             or '*')
    qs = FilterKey.objects.all()
    if project:
        qs = qs.filter(project=project)
    for obj in RangeQuerySetWrapper(qs):
        if not mqs.filter(key=obj.key).exists():
            log.info(
                "Removing unused filter %s=*",
                obj.key,
            )
            qs.filter(key=obj.key).delete()
            obj.delete()

    # FilterValue
    log.info("Removing %r for days=%s project=%r", FilterValue, days, project
             or '*')
    qs = FilterValue.objects.all()
    if project:
        qs = qs.filter(project=project)
    for obj in RangeQuerySetWrapper(qs):
        if not mqs.filter(key=obj.key, value=obj.value).exists():
            log.info("Removing unused filter %s=%s", obj.key, obj.value)
            qs.filter(key=obj.key, value=obj.value).delete()
            obj.delete()
Exemplo n.º 21
0
def cleanup(days=30,
            logger=None,
            site=None,
            server=None,
            level=None,
            project=None):
    """
    Deletes a portion of the trailing data in Sentry based on
    their creation dates. For example, if ``days`` is 30, this
    would attempt to clean up all data thats older than 30 days.

    :param logger: limit all deletion scopes to messages from the
                   specified logger.
    :param site: limit the message deletion scope to the specified
                 site.
    :param server: limit the message deletion scope to the specified
                   server.
    :param level: limit all deleteion scopes to messages that are greater
                  than or equal to level.
    """
    import datetime

    from sentry.models import Group, Event, MessageCountByMinute, \
                              MessageFilterValue, FilterValue
    from sentry.utils import timezone
    from sentry.utils.query import RangeQuerySetWrapper, SkinnyQuerySet

    # TODO: we should collect which messages above were deleted
    # and potentially just send out post_delete signals where
    # GroupedMessage can update itself accordingly
    ts = timezone.now() - datetime.timedelta(days=days)

    # Message
    qs = SkinnyQuerySet(Event).filter(datetime__lte=ts)
    if logger:
        qs = qs.filter(logger=logger)
    if site:
        qs = qs.filter(site=site)
    if server:
        qs = qs.filter(server_name=server)
    if level:
        qs = qs.filter(level__gte=level)
    if project:
        qs = qs.filter(project=project)

    groups_to_check = set()
    for obj in RangeQuerySetWrapper(qs):
        print ">>> Removing <%s: id=%s>" % (obj.__class__.__name__, obj.pk)
        obj.delete()
        groups_to_check.add(obj.group_id)

    if not (server or site):
        # MessageCountByMinute
        qs = SkinnyQuerySet(MessageCountByMinute).filter(date__lte=ts)
        if logger:
            qs = qs.filter(group__logger=logger)
        if level:
            qs = qs.filter(group__level__gte=level)
        if project:
            qs = qs.filter(project=project)

        for obj in RangeQuerySetWrapper(qs):
            print ">>> Removing <%s: id=%s>" % (obj.__class__.__name__, obj.pk)
            obj.delete()

        # GroupedMessage
        qs = SkinnyQuerySet(Group).filter(last_seen__lte=ts)
        if logger:
            qs = qs.filter(logger=logger)
        if level:
            qs = qs.filter(level__gte=level)
        if project:
            qs = qs.filter(project=project)

        for obj in RangeQuerySetWrapper(qs):
            for key, value in SkinnyQuerySet(MessageFilterValue).filter(
                    group=obj).values_list('key', 'value'):
                if not MessageFilterValue.objects.filter(
                        key=key, value=value).exclude(group=obj).exists():
                    print ">>> Removing <FilterValue: key=%s, value=%s>" % (
                        key, value)
                    FilterValue.objects.filter(key=key, value=value).delete()
            print ">>> Removing <%s: id=%s>" % (obj.__class__.__name__, obj.pk)
            obj.delete()

    # attempt to cleanup any groups that may now be empty
    groups_to_delete = []
    for group_id in groups_to_check:
        if not Event.objects.filter(group=group_id).exists():
            groups_to_delete.append(group_id)

    if groups_to_delete:
        for obj in SkinnyQuerySet(Group).filter(pk__in=groups_to_delete):
            for key, value in SkinnyQuerySet(MessageFilterValue).filter(
                    group=obj).values_list('key', 'value'):
                if not MessageFilterValue.objects.filter(
                        key=key, value=value).exclude(group=obj).exists():
                    print ">>> Removing <FilterValue: key=%s, value=%s>" % (
                        key, value)
                    FilterValue.objects.filter(key=key, value=value).delete()
            print ">>> Removing <%s: id=%s>" % (obj.__class__.__name__, obj.pk)
            obj.delete()
Exemplo n.º 22
0
    def forwards(self, orm):
        from sentry.utils.query import (RangeQuerySetWrapper,
                                        RangeQuerySetWrapperWithProgressBar,
                                        WithProgressBar)

        Organization = orm['sentry.Organization']
        OrganizationMember = orm['sentry.OrganizationMember']
        PendingTeamMember = orm['sentry.PendingTeamMember']
        TeamMember = orm['sentry.TeamMember']
        Team = orm['sentry.Team']

        teams_by_org = defaultdict(list)

        for org in RangeQuerySetWrapper(Organization.objects.all()):
            for team in Team.objects.filter(organization=org):
                teams_by_org[org].append(team)

        for org, team_list in WithProgressBar(list(teams_by_org.items()),
                                              caption='Organizations'):
            team_member_qs = TeamMember.objects.filter(
                team__organization=org).select_related('team')

            members_by_user = defaultdict(list)
            for member in team_member_qs.iterator():
                if member.user_id == member.team.owner_id:
                    continue  # team owners are already present
                members_by_user[member.user_id].append(member)

            total_teams = len(team_list)

            for user_id, member_list in six.iteritems(members_by_user):
                # if they were a member of all teams, give them global access
                has_global_access = len(member_list) == total_teams

                # give them the highest level access they had
                access = min(m.type for m in member_list)

                sid = transaction.savepoint()
                try:
                    om = OrganizationMember.objects.create(
                        organization=org,
                        user_id=user_id,
                        type=access,
                        has_global_access=has_global_access,
                    )
                except IntegrityError:
                    transaction.savepoint_rollback(sid)
                    continue
                else:
                    transaction.savepoint_commit(sid)

                if not has_global_access:
                    for member in member_list:
                        om.teams.add(member.team)

            for pm in PendingTeamMember.objects.filter(team=team):
                om, _ = OrganizationMember.objects.get_or_create(
                    organization=org,
                    email=pm.email,
                    has_global_access=False,
                    defaults={'type': pm.type},
                )
                om.teams.add(team)
            transaction.commit()
Exemplo n.º 23
0
def cleanup(days=30,
            logger=None,
            site=None,
            server=None,
            level=None,
            project=None,
            resolved=None,
            **kwargs):
    """
    Deletes a portion of the trailing data in Sentry based on
    their creation dates. For example, if ``days`` is 30, this
    would attempt to clean up all data thats older than 30 days.

    :param logger: limit all deletion scopes to messages from the
                   specified logger.
    :param site: limit the message deletion scope to the specified
                 site.
    :param server: limit the message deletion scope to the specified
                   server.
    :param level: limit all deletion scopes to messages that are greater
                  than or equal to level.
    :param project: limit all deletion scopes to messages that are part
                    of the given project
    :param resolved: limit all deletion scopes to messages that are resolved.
    """
    import datetime

    from django.utils import timezone
    from sentry.models import Group, Event, MessageCountByMinute, \
      MessageFilterValue, FilterKey, FilterValue, SearchDocument, ProjectCountByMinute
    from sentry.utils.query import RangeQuerySetWrapper, SkinnyQuerySet

    log = cleanup.get_logger()

    def cleanup_groups(iterable):
        for obj in iterable:
            log.info("Removing all matching <SearchDocument: group=%s>",
                     obj.pk)
            SearchDocument.objects.filter(group=obj).delete()
            log.info("Removing <%s: id=%s>", obj.__class__.__name__, obj.pk)
            obj.delete()

    # TODO: we should collect which messages above were deleted
    # and potentially just send out post_delete signals where
    # GroupedMessage can update itself accordingly
    ts = timezone.now() - datetime.timedelta(days=days)

    # Message
    qs = SkinnyQuerySet(Event).filter(datetime__lte=ts)
    if logger:
        qs = qs.filter(logger=logger)
    if site:
        qs = qs.filter(site=site)
    if server:
        qs = qs.filter(server_name=server)
    if level:
        qs = qs.filter(level__gte=level)
    if project:
        qs = qs.filter(project=project)
    if resolved is True:
        qs = qs.filter(group__status=1)
    elif resolved is False:
        qs = qs.filter(group__status=0)

    groups_to_check = set()
    if resolved is None:
        for obj in RangeQuerySetWrapper(qs):
            log.info("Removing <%s: id=%s>", obj.__class__.__name__, obj.pk)
            obj.delete()
            groups_to_check.add(obj.group_id)

    if not (server or site):
        # MessageCountByMinute
        qs = SkinnyQuerySet(MessageCountByMinute).filter(date__lte=ts)
        if logger:
            qs = qs.filter(group__logger=logger)
        if level:
            qs = qs.filter(group__level__gte=level)
        if project:
            qs = qs.filter(project=project)
        if resolved is True:
            qs = qs.filter(group__status=1)
        elif resolved is False:
            qs = qs.filter(group__status=0)

        for obj in RangeQuerySetWrapper(qs):
            log.info("Removing <%s: id=%s>", obj.__class__.__name__, obj.pk)
            obj.delete()

        # Group
        qs = SkinnyQuerySet(Group).filter(last_seen__lte=ts)
        if logger:
            qs = qs.filter(logger=logger)
        if level:
            qs = qs.filter(level__gte=level)
        if project:
            qs = qs.filter(project=project)
        if resolved is True:
            qs = qs.filter(status=1)
        elif resolved is False:
            qs = qs.filter(status=0)

        cleanup_groups(RangeQuerySetWrapper(qs))

    # Project counts
    # TODO: these dont handle filters
    qs = SkinnyQuerySet(ProjectCountByMinute).filter(date__lte=ts)
    if project:
        qs = qs.filter(project=project)

    for obj in RangeQuerySetWrapper(qs):
        log.info("Removing <%s: id=%s>", obj.__class__.__name__, obj.pk)
        obj.delete()

    # Filters
    qs = FilterKey.objects.all()
    if project:
        qs = qs.filter(project=project)

    mqs = MessageFilterValue.objects.all()
    if project:
        mqs = mqs.filter(project=project)

    for obj in RangeQuerySetWrapper(qs):
        if not mqs.filter(key=obj.key).exists():
            log.info(
                "Removing filters for unused filter %s=*",
                obj.key,
            )
            qs.filter(key=obj.key).delete()
            obj.delete()

    qs = FilterValue.objects.all()
    if project:
        qs = qs.filter(project=project)

    for obj in RangeQuerySetWrapper(qs):
        if not mqs.filter(key=obj.key, value=obj.value).exists():
            log.info("Removing filters for unused filter %s=%s", obj.key,
                     obj.value)
            qs.filter(key=obj.key, value=obj.value).delete()
            obj.delete()

    # attempt to cleanup any groups that may now be empty
    groups_to_delete = []
    for group_id in groups_to_check:
        if not Event.objects.filter(group=group_id).exists():
            groups_to_delete.append(group_id)

    if groups_to_delete:
        cleanup_groups(SkinnyQuerySet(Group).filter(pk__in=groups_to_delete))
Exemplo n.º 24
0
def backfill_eventstream(apps, schema_editor):
    """
    Inserts Postgres events into the eventstream if there are recent events in Postgres.

    This is for open source users migrating from 9.x who want to keep their events.
    If there are no recent events in Postgres, skip the backfill.
    """
    from sentry import eventstore, eventstream
    from sentry.utils.query import RangeQuerySetWrapper

    Event = apps.get_model("sentry", "Event")
    Group = apps.get_model("sentry", "Group")
    Project = apps.get_model("sentry", "Project")

    # Kill switch to skip this migration
    skip_backfill = os.environ.get("SENTRY_SKIP_EVENTS_BACKFILL_FOR_10", False)

    # Use 90 day retention if the option has not been set or set to 0
    DEFAULT_RETENTION = 90
    retention_days = options.get(
        "system.event-retention-days") or DEFAULT_RETENTION

    def get_events(last_days):
        to_date = timezone.now()
        from_date = to_date - timedelta(days=last_days)
        return Event.objects.filter(datetime__gte=from_date,
                                    datetime__lte=to_date,
                                    group_id__isnull=False)

    def _attach_related(_events):
        project_ids = set()
        group_ids = set()
        for event in _events:
            project_ids.add(event.project_id)
            group_ids.add(event.group_id)
        projects = {
            p.id: p
            for p in Project.objects.filter(id__in=project_ids)
        }
        groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)}

        for event in _events:
            event.project = projects.get(event.project_id)
            event.group = groups.get(event.group_id)
        eventstore.bind_nodes(_events, "data")

    if skip_backfill:
        print("Skipping backfill.\n")
        return

    events = get_events(retention_days)
    count = events.count()

    if count == 0:
        print("Nothing to do, skipping migration.\n")
        return

    print("Events to process: {}\n".format(count))

    processed = 0
    for e in RangeQuerySetWrapper(events,
                                  step=100,
                                  callbacks=(_attach_related, )):
        event = NewEvent(project_id=e.project_id,
                         event_id=e.event_id,
                         group_id=e.group_id,
                         data=e.data.data)
        primary_hash = event.get_primary_hash()
        if event.project is None or event.group is None or len(
                event.data) == 0:
            print(
                "Skipped {} as group, project or node data information is invalid.\n"
                .format(event))
            continue

        try:
            eventstream.insert(
                group=event.group,
                event=event,
                is_new=False,
                is_regression=False,
                is_new_group_environment=False,
                primary_hash=primary_hash,
                received_timestamp=event.data.get("received")
                or float(event.datetime.strftime("%s")),
                skip_consume=True,
            )
            processed += 1
        except Exception as error:
            print(
                "An error occured while trying to instert the following event: {}\n.----\n{}"
                .format(event, error))

    if processed == 0:
        raise Exception(
            "Cannot migrate any event. If this is okay, re-run migrations with SENTRY_SKIP_EVENTS_BACKFILL_FOR_10 environment variable set to skip this step."
        )

    print("Event migration done. Migrated {} of {} events.\n".format(
        processed, count))
Exemplo n.º 25
0
    def handle(self,
               from_ts=None,
               to_ts=None,
               last_days=None,
               from_id=None,
               to_id=None,
               no_input=False,
               **options):
        def _attach_related(_events):
            project_ids = set([event.project_id for event in _events])
            projects = {
                p.id: p
                for p in Project.objects.filter(id__in=project_ids)
            }
            group_ids = set([event.group_id for event in _events])
            groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)}
            for event in _events:
                event.project = projects[event.project_id]
                event.group = groups[event.group_id]
            eventstore.bind_nodes(_events, "data")

        from sentry import eventstream
        from sentry.utils.query import RangeQuerySetWrapper

        filter_methods = bool(last_days) + bool(from_ts or to_ts) + bool(
            from_id or to_id)
        if filter_methods > 1:
            raise CommandError(
                "You can either limit by primary key, or by timestamp, or last X days."
            )
        elif from_ts and to_ts:
            events = self.get_events_by_timestamp(from_ts, to_ts)
        elif last_days:
            events = self.get_events_by_last_days(last_days)
        elif from_id and to_id:
            events = self.get_events_by_id(from_id, to_id)
        else:
            raise CommandError(
                "Invalid arguments: either use --from/--to-id, or --from/--to-ts, or --last-days."
            )

        count = events.count()
        self.stdout.write("Events to process: {}\n".format(count))

        if count == 0:
            self.stdout.write("Nothing to do.\n")
            sys.exit(0)

        if not no_input:
            proceed = six.moves.input("Do you want to continue? [y/N] ")
            if proceed.strip().lower() not in ["yes", "y"]:
                raise CommandError("Aborted.")

        for event in RangeQuerySetWrapper(events,
                                          step=100,
                                          callbacks=(_attach_related, )):
            primary_hash = event.get_primary_hash()
            eventstream.insert(
                group=event.group,
                event=event,
                is_new=False,
                is_regression=False,
                is_new_group_environment=False,
                primary_hash=primary_hash,
                skip_consume=True,
            )

        self.stdout.write("Done.\n")
Exemplo n.º 26
0
    def forwards(self, orm):
        from django.db.models import F
        from collections import defaultdict
        from sentry.utils.models import create_or_update
        from sentry.utils.query import RangeQuerySetWrapper

        # We don't fully merge results because it's simply not worth it
        for group in RangeQuerySetWrapper(orm['sentry.Group'].objects.all()):

            # could be already migrated
            if not orm['sentry.Group'].objects.filter(id=group.id).exists():
                continue

            matches = list(orm['sentry.Group'].objects.exclude(
                id=group.id).filter(checksum=group.checksum,
                                    project=group.project))

            if not matches:
                continue

            print "Merging duplicate events for %r" % (group, )

            updates = defaultdict(int)
            updates.update({
                'first_seen': group.first_seen,
                'last_seen': group.last_seen,
                'active_at': group.active_at,
            })

            tag_updates = defaultdict(lambda: defaultdict(int))
            counts = defaultdict(lambda: defaultdict(int))
            for other in matches:
                # migrate events first
                orm['sentry.Event'].objects.filter(group=other).update(
                    group=group)

                updates['times_seen'] += other.times_seen
                updates['users_seen'] += other.users_seen
                updates['time_spent_total'] += other.time_spent_total
                updates['time_spent_count'] += other.time_spent_count
                for datecol in ('active_at', 'last_seen', 'first_seen'):
                    val = getattr(other, datecol)
                    if val and updates[datecol]:
                        updates[datecol] = max(val, updates[datecol])
                    elif val:
                        updates[datecol] = val

                # determine missing tags
                for tag in RangeQuerySetWrapper(
                        orm['sentry.MessageFilterValue'].objects.filter(
                            group=other)):
                    key = tag_updates[(tag.key, tag.value)]
                    key['times_seen'] += other.times_seen
                    for datecol in ('last_seen', 'first_seen'):
                        val = getattr(other, datecol)
                        if val and updates[datecol]:
                            updates[datecol] = max(val, updates[datecol])
                        elif val:
                            updates[datecol] = val

                # determine counts
                for count in RangeQuerySetWrapper(
                        orm['sentry.MessageCountByMinute'].objects.filter(
                            group=other)):
                    key = counts[count.date]
                    key['times_seen'] += count.times_seen
                    key['time_spent_total'] += count.time_spent_total
                    key['time_spent_count'] += count.time_spent_count

            # migrate tags
            for (key, value), data in tag_updates.iteritems():
                defaults = {
                    'times_seen': F('times_seen') + data['times_seen'],
                }
                if 'last_seen' in data:
                    defaults['last_seen'] = data['last_seen']
                if 'first_seen' in data:
                    defaults['first_seen'] = data['first_seen']

                create_or_update(orm['sentry.MessageFilterValue'],
                                 project=group.project,
                                 group=group,
                                 key=key,
                                 value=value,
                                 defaults=defaults)

            orm['sentry.MessageFilterValue'].objects.filter(
                group__in=matches).delete()

            # migrate counts
            for date, data in counts.iteritems():
                create_or_update(
                    orm['sentry.MessageCountByMinute'],
                    project=group.project,
                    group=group,
                    date=date,
                    defaults={
                        'times_seen':
                        F('times_seen') + data['times_seen'],
                        'time_spent_total':
                        F('time_spent_total') + data['time_spent_total'],
                        'time_spent_count':
                        F('time_spent_count') + data['time_spent_count'],
                    })

            orm['sentry.MessageCountByMinute'].objects.filter(
                group__in=matches).delete()

            orm['sentry.Group'].objects.filter(id=group.id).update(
                times_seen=F('times_seen') + updates['times_seen'],
                users_seen=F('users_seen') + updates['user_seen'],
                time_spent_total=F('time_spent_total') +
                updates['time_spent_total'],
                time_spent_count=F('time_spent_count') +
                updates['time_spent_count'],
                last_seen=updates['last_seen'],
                first_seen=updates['first_seen'],
                active_at=updates['active_at'],
            )

            for other in matches:
                other.delete()
Exemplo n.º 27
0
def backfill_eventstream(apps, schema_editor):
    """
    Inserts Postgres events into the eventstream if there are recent events in Postgres.

    This is for open source users migrating from 9.x who want to keep their events.
    If there are no recent events in Postgres, skip the backfill.
    """
    from sentry import eventstore, eventstream
    from sentry.utils.query import RangeQuerySetWrapper

    Event = apps.get_model("sentry", "Event")
    Group = apps.get_model("sentry", "Group")
    Project = apps.get_model("sentry", "Project")

    # Kill switch to skip this migration
    skip_backfill = os.environ.get("SENTRY_SKIP_EVENTS_BACKFILL_FOR_10", False)

    # Use 90 day retention if the option has not been set or set to 0
    DEFAULT_RETENTION = 90
    retention_days = options.get(
        "system.event-retention-days") or DEFAULT_RETENTION

    def get_events(last_days):
        to_date = datetime.now()
        from_date = to_date - timedelta(days=last_days)
        return Event.objects.filter(datetime__gte=from_date,
                                    datetime__lte=to_date,
                                    group_id__isnull=False)

    def _attach_related(_events):
        project_ids = set()
        group_ids = set()
        for event in _events:
            project_ids.add(event.project_id)
            group_ids.add(event.group_id)
        projects = {
            p.id: p
            for p in Project.objects.filter(id__in=project_ids)
        }
        groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)}

        for event in _events:
            event.project = projects.get(event.project_id)
            event.group = groups.get(event.group_id)
        eventstore.bind_nodes(_events, "data")

    if skip_backfill:
        print("Skipping backfill.\n")
        return

    events = get_events(retention_days)
    count = events.count()

    if count == 0:
        print("Nothing to do, skipping migration.\n")
        return

    print("Events to process: {}\n".format(count))

    processed = 0
    for event in RangeQuerySetWrapper(events,
                                      step=100,
                                      callbacks=(_attach_related, )):
        primary_hash = event.get_primary_hash()
        if event.project is None or event.group is None:
            print("Skipped {} as group or project information is invalid.\n".
                  format(event))
            continue

        eventstream.insert(
            group=event.group,
            event=event,
            is_new=False,
            is_regression=False,
            is_new_group_environment=False,
            primary_hash=primary_hash,
            skip_consume=True,
        )
        processed += 1

    print("Event migration done. Processed {} of {} events.\n".format(
        processed, count))
Exemplo n.º 28
0
def backfill_eventstream(apps, schema_editor):
    """
    Inserts Postgres events into the eventstream if there are recent events in Postgres.

    This is for open source users migrating from 9.x who want to keep their events.
    If there are no recent events in Postgres, skip the backfill.
    """
    from sentry import eventstore, eventstream
    from sentry.utils.query import RangeQuerySetWrapper

    Event = apps.get_model("sentry", "Event")
    Group = apps.get_model("sentry", "Group")
    Project = apps.get_model("sentry", "Project")

    # Kill switch to skip this migration
    skip_backfill = os.environ.get("SENTRY_SKIP_EVENTS_BACKFILL_FOR_10", False)

    # Use 90 day retention if the option has not been set or set to 0
    DEFAULT_RETENTION = 90
    retention_days = options.get(
        "system.event-retention-days") or DEFAULT_RETENTION

    def get_events(last_days):
        to_date = timezone.now()
        from_date = to_date - timedelta(days=last_days)
        return Event.objects.filter(datetime__gte=from_date,
                                    datetime__lte=to_date,
                                    group_id__isnull=False)

    def _attach_related(_events):
        project_ids = set()
        group_ids = set()
        for event in _events:
            project_ids.add(event.project_id)
            group_ids.add(event.group_id)
        projects = {
            p.id: p
            for p in Project.objects.filter(id__in=project_ids)
        }
        groups = {g.id: g for g in Group.objects.filter(id__in=group_ids)}

        for event in _events:
            event.project = projects.get(event.project_id)
            event.group = groups.get(event.group_id)
            # When migrating old data from Sentry 9.0.0 to 9.1.2 to 10 in rapid succession, the event timestamp may be
            # missing. This adds it back
            if "timestamp" not in event.data.data:
                event.data.data["timestamp"] = to_timestamp(event.datetime)
        eventstore.bind_nodes(_events, "data")

    if skip_backfill:
        print("Skipping backfill.\n")  # noqa: B314
        return

    events = get_events(retention_days)
    count = events.count()

    if count == 0:
        print("Nothing to do, skipping migration.\n")  # noqa: B314
        return

    print("Events to process: {}\n".format(count))  # noqa: B314

    processed = 0
    for e in RangeQuerySetWrapper(events,
                                  step=100,
                                  callbacks=(_attach_related, )):
        event = NewEvent(project_id=e.project_id,
                         event_id=e.event_id,
                         group_id=e.group_id,
                         data=e.data.data)

        try:
            group = event.group
        except Group.DoesNotExist:
            group = None

        if event.project is None or group is None or len(event.data) == 0:
            print(  # noqa: B314
                "Skipped {} as group, project or node data information is invalid.\n"
                .format(event))
            continue

        try:
            eventstream.insert(
                group=event.group,
                event=event,
                is_new=False,
                is_regression=False,
                is_new_group_environment=False,
                primary_hash=event.get_primary_hash(),
                received_timestamp=event.data.get("received")
                or float(event.datetime.strftime("%s")),
                skip_consume=True,
            )

            # The node ID format was changed in Sentry 9.1.0
            # (https://github.com/getsentry/sentry/commit/f73a4039d16a5c4f88bde37f6464cac21deb50e1)
            # If we are migrating from older versions of Sentry (i.e. 9.0.0 and earlier)
            # we need to resave the node using the new node ID scheme and delete the old
            # node.
            old_node_id = e.data.id
            new_node_id = event.data.id
            if old_node_id != new_node_id:
                event.data.save()
                nodestore.delete(old_node_id)

            processed += 1
        except Exception as error:
            print(  # noqa: B314
                "An error occured while trying to migrate the following event: {}\n.----\n{}"
                .format(event, error))

    if processed == 0:
        raise Exception(
            "Cannot migrate any event. If this is okay, re-run migrations with SENTRY_SKIP_EVENTS_BACKFILL_FOR_10 environment variable set to skip this step."
        )

    print(  # noqa: B314
        "Event migration done. Migrated {} of {} events.\n".format(
            processed, count))