def test_get_fingerprint(self): assert (get_fingerprint( self.store_event(data={"message": "Hello world"}, project_id=self.project.id)) == hashlib.md5( b"Hello world").hexdigest()) assert (get_fingerprint( self.store_event( data={ "message": "Hello world", "fingerprint": ["Not hello world"] }, project_id=self.project.id, )) == hashlib.md5(b"Not hello world").hexdigest())
def test_get_fingerprint(): assert (get_fingerprint( Event(data={"logentry": { "message": "Hello world" }})) == hashlib.md5("Hello world").hexdigest()) assert (get_fingerprint( Event( data={ "fingerprint": ["Not hello world"], "logentry": { "message": "Hello world" } })) == hashlib.md5("Not hello world").hexdigest())
def test_get_fingerprint(): assert get_fingerprint( Event(data={ 'logentry': { 'message': 'Hello world', }, }, )) == hashlib.md5('Hello world').hexdigest() assert get_fingerprint( Event(data={ 'fingerprint': ['Not hello world'], 'logentry': { 'message': 'Hello world', }, }, )) == hashlib.md5('Not hello world').hexdigest()
def test_get_fingerprint(): assert get_fingerprint( Event( data={ 'logentry': { 'message': 'Hello world', }, }, ) ) == hashlib.md5('Hello world').hexdigest() assert get_fingerprint( Event( data={ 'fingerprint': ['Not hello world'], 'logentry': { 'message': 'Hello world', }, }, ) ) == hashlib.md5('Not hello world').hexdigest()
def test_unmerge(self): now = datetime(2017, 5, 3, 6, 6, 6, tzinfo=pytz.utc) def shift(i): return timedelta(seconds=1 << i) project = self.create_project() source = self.create_group(project) sequence = itertools.count(0) tag_values = itertools.cycle(['red', 'green', 'blue']) user_values = itertools.cycle([ { 'id': 1 }, { 'id': 2 }, ]) EnvironmentProject.objects.create( environment=Environment.objects.create( organization_id=project.organization_id, name='production', ), project=project, ) def create_message_event(template, parameters): i = next(sequence) event_id = uuid.UUID( fields=(i, 0x0, 0x1000, 0x80, 0x80, 0x808080808080, ), ).hex event = Event.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, message='%s' % (id, ), datetime=now + shift(i), data={ 'environment': 'production', 'type': 'default', 'metadata': { 'title': template % parameters, }, 'sentry.interfaces.Message': { 'message': template, 'params': parameters, 'formatted': template % parameters, }, 'sentry.interfaces.User': next(user_values), 'tags': [ ['color', next(tag_values)], ['environment', 'production'], ['sentry:release', 'version'], ], }, ) with self.tasks(): Group.objects.add_tags( source, tags=event.get_tags(), ) EventMapping.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, date_added=event.datetime, ) UserReport.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, name='Log Hat', email='*****@*****.**', comments='Quack', ) features.record(event) return event events = OrderedDict() for event in (create_message_event('This is message #%s.', i) for i in xrange(10)): events.setdefault(get_fingerprint(event), []).append(event) for event in (create_message_event('This is message #%s!', i) for i in xrange(10, 17)): events.setdefault(get_fingerprint(event), []).append(event) assert len(events) == 2 assert sum(map(len, events.values())) == 17 # XXX: This is super contrived considering that it doesn't actually go # through the event pipeline, but them's the breaks, eh? for fingerprint in events.keys(): GroupHash.objects.create( project=project, group=source, hash=fingerprint, ) assert set(GroupTagKey.objects.filter(group=source).values_list('key', 'values_seen') ) == set([ (u'color', 3), (u'environment', 1), (u'sentry:release', 1), ]) assert set( GroupTagValue.objects.filter( group_id=source.id, ).values_list('key', 'value', 'times_seen') ) == set( [ (u'color', u'red', 6), (u'color', u'green', 6), (u'color', u'blue', 5), (u'environment', u'production', 17), (u'sentry:release', u'version', 17), ] ) assert features.compare(source) == [ (source.id, {'message:message:character-shingles': 1.0}), ] with self.tasks(): unmerge.delay( source.project_id, source.id, None, [events.keys()[1]], None, batch_size=5, ) assert list( Group.objects.filter(id=source.id).values_list( 'times_seen', 'first_seen', 'last_seen', ) ) == [(10, now + shift(0), now + shift(9), )] source_activity = Activity.objects.get( group_id=source.id, type=Activity.UNMERGE_SOURCE, ) destination = Group.objects.get( id=source_activity.data['destination_id'], ) assert list( Group.objects.filter(id=destination.id).values_list( 'times_seen', 'first_seen', 'last_seen', ) ) == [(7, now + shift(10), now + shift(16), )] assert source_activity.data == { 'destination_id': destination.id, 'fingerprints': [events.keys()[1]], } assert source.id != destination.id assert source.project == destination.project assert Activity.objects.get( group_id=destination.id, type=Activity.UNMERGE_DESTINATION, ).data == { 'source_id': source.id, 'fingerprints': [events.keys()[1]], } source_event_event_ids = map( lambda event: event.event_id, events.values()[0], ) assert source.event_set.count() == 10 assert set( EventMapping.objects.filter( group_id=source.id, ).values_list('event_id', flat=True) ) == set(source_event_event_ids) assert set( UserReport.objects.filter( group_id=source.id, ).values_list('event_id', flat=True) ) == set(source_event_event_ids) assert set(GroupHash.objects.filter( group_id=source.id, ).values_list('hash', flat=True)) == set([events.keys()[0]]) assert set( GroupRelease.objects.filter( group_id=source.id, ).values_list('environment', 'first_seen', 'last_seen') ) == set([ (u'production', now + shift(0), now + shift(9), ), ]) assert set(GroupTagKey.objects.filter(group=source).values_list('key', 'values_seen') ) == set([ (u'color', 3), (u'environment', 1), (u'sentry:release', 1), ]) assert set( GroupTagValue.objects.filter( group_id=source.id, ).values_list('key', 'value', 'times_seen', 'first_seen', 'last_seen') ) == set( [ (u'color', u'red', 4, now + shift(0), now + shift(9), ), (u'color', u'green', 3, now + shift(1), now + shift(7), ), (u'color', u'blue', 3, now + shift(2), now + shift(8), ), (u'environment', u'production', 10, now + shift(0), now + shift(9), ), (u'sentry:release', u'version', 10, now + shift(0), now + shift(9), ), ] ) destination_event_event_ids = map( lambda event: event.event_id, events.values()[1], ) assert destination.event_set.count() == 7 assert set( EventMapping.objects.filter( group_id=destination.id, ).values_list('event_id', flat=True) ) == set(destination_event_event_ids) assert set( UserReport.objects.filter( group_id=destination.id, ).values_list('event_id', flat=True) ) == set(destination_event_event_ids) assert set( GroupHash.objects.filter( group_id=destination.id, ).values_list('hash', flat=True) ) == set([events.keys()[1]]) assert set( GroupRelease.objects.filter( group_id=destination.id, ).values_list('environment', 'first_seen', 'last_seen') ) == set([ (u'production', now + shift(10), now + shift(16), ), ]) assert set(GroupTagKey.objects.filter(group=destination).values_list('key', 'values_seen') ) == set([ (u'color', 3), (u'environment', 1), (u'sentry:release', 1), ]) assert set( GroupTagValue.objects.filter( group_id=destination.id, ).values_list('key', 'value', 'times_seen', 'first_seen', 'last_seen') ) == set( [ (u'color', u'red', 2, now + shift(12), now + shift(15), ), (u'color', u'green', 3, now + shift(10), now + shift(16), ), (u'color', u'blue', 2, now + shift(11), now + shift(14), ), (u'environment', u'production', 7, now + shift(10), now + shift(16), ), (u'sentry:release', u'version', 7, now + shift(10), now + shift(16), ), ] ) time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now, now + shift(16), ) def get_expected_series_values(rollup, events, function=None): if function is None: def function(aggregate, event): return (aggregate if aggregate is not None else 0) + 1 expected = {} for event in events: k = float((to_timestamp(event.datetime) // rollup_duration) * rollup_duration) expected[k] = function(expected.get(k), event) return expected def assert_series_contains(expected, actual, default=0): actual = dict(actual) for key, value in expected.items(): assert actual[key] == value for key in set(actual.keys()) - set(expected.keys()): assert actual[key] == default rollup_duration = time_series.values()[0][1][0] - time_series.values()[0][0][0] assert_series_contains( get_expected_series_values(rollup_duration, events.values()[0]), time_series[source.id], 0, ) assert_series_contains( get_expected_series_values(rollup_duration, events.values()[1]), time_series[destination.id], 0, ) time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now, now + shift(16), ) rollup_duration = time_series.values()[0][1][0] - time_series.values()[0][0][0] def collect_by_user_tag(aggregate, event): aggregate = aggregate if aggregate is not None else set() aggregate.add( get_event_user_from_interface( event.data['sentry.interfaces.User'], ).tag_value, ) return aggregate assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[0], collect_by_user_tag, ).items() }, time_series[source.id], ) assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[1], collect_by_user_tag, ).items() }, time_series[destination.id], ) time_series = tsdb.get_most_frequent_series( tsdb.models.frequent_releases_by_group, [source.id, destination.id], now, now + shift(16), ) rollup_duration = time_series.values()[0][1][0] - time_series.values()[0][0][0] def collect_by_release(group, aggregate, event): aggregate = aggregate if aggregate is not None else {} release = GroupRelease.objects.get( group_id=group.id, environment=event.data['environment'], release_id=Release.objects.get( organization_id=project.organization_id, version=event.get_tag('sentry:release'), ).id, ).id aggregate[release] = aggregate.get(release, 0) + 1 return aggregate assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0], functools.partial( collect_by_release, source, ), ), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[1], functools.partial( collect_by_release, destination, ), ), time_series[destination.id], {}, ) time_series = tsdb.get_most_frequent_series( tsdb.models.frequent_environments_by_group, [source.id, destination.id], now, now + shift(16), ) rollup_duration = time_series.values()[0][1][0] - time_series.values()[0][0][0] def collect_by_environment(aggregate, event): aggregate = aggregate if aggregate is not None else {} environment = Environment.objects.get( organization_id=project.organization_id, name=event.data['environment'], ).id aggregate[environment] = aggregate.get(environment, 0) + 1 return aggregate assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0], collect_by_environment, ), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[1], collect_by_environment, ), time_series[destination.id], {}, ) source_similar_items = features.compare(source) assert source_similar_items[0] == (source.id, {'message:message:character-shingles': 1.0}) assert source_similar_items[1][0] == destination.id assert source_similar_items[1][1].keys() == ['message:message:character-shingles'] assert source_similar_items[1][1]['message:message:character-shingles'] < 1.0 destination_similar_items = features.compare(destination) assert destination_similar_items[0] == ( destination.id, {'message:message:character-shingles': 1.0}) assert destination_similar_items[1][0] == source.id assert destination_similar_items[1][1].keys() == ['message:message:character-shingles'] assert destination_similar_items[1][1]['message:message:character-shingles'] < 1.0
def test_unmerge(self): now = before_now(minutes=5).replace(microsecond=0, tzinfo=pytz.utc) def time_from_now(offset=0): return now + timedelta(seconds=offset) project = self.create_project() sequence = itertools.count(0) tag_values = itertools.cycle(["red", "green", "blue"]) user_values = itertools.cycle([{"id": 1}, {"id": 2}]) def create_message_event(template, parameters, environment, release, fingerprint="group1"): i = next(sequence) event_id = uuid.UUID(fields=(i, 0x0, 0x1000, 0x80, 0x80, 0x808080808080)).hex tags = [["color", next(tag_values)]] if release: tags.append(["sentry:release", release]) event = self.store_event( data={ "event_id": event_id, "message": template % parameters, "type": "default", "user": next(user_values), "tags": tags, "fingerprint": [fingerprint], "timestamp": iso_format(now + timedelta(seconds=i)), "environment": environment, "release": release, }, project_id=project.id, ) UserReport.objects.create( project_id=project.id, group_id=event.group.id, event_id=event_id, name="Log Hat", email="*****@*****.**", comments="Quack", ) features.record([event]) return event events = OrderedDict() for event in ( create_message_event( "This is message #%s.", i, environment="production", release="version" ) for i in xrange(10) ): events.setdefault(get_fingerprint(event), []).append(event) for event in ( create_message_event( "This is message #%s!", i, environment="production", release="version2", fingerprint="group2", ) for i in xrange(10, 16) ): events.setdefault(get_fingerprint(event), []).append(event) event = create_message_event( "This is message #%s!", 17, environment="staging", release="version3", fingerprint="group3", ) events.setdefault(get_fingerprint(event), []).append(event) merge_source, source, destination = list(Group.objects.all()) assert len(events) == 3 assert sum(map(len, events.values())) == 17 production_environment = Environment.objects.get( organization_id=project.organization_id, name="production" ) with self.tasks(): eventstream_state = eventstream.start_merge(project.id, [merge_source.id], source.id) merge_groups.delay([merge_source.id], source.id) eventstream.end_merge(eventstream_state) assert set( [ (gtv.value, gtv.times_seen) for gtv in tagstore.get_group_tag_values( project.id, source.id, production_environment.id, "color" ) ] ) == set([("red", 6), ("green", 5), ("blue", 5)]) similar_items = features.compare(source) assert len(similar_items) == 2 assert similar_items[0][0] == source.id assert similar_items[0][1]["message:message:character-shingles"] == 1.0 assert similar_items[1][0] == destination.id assert similar_items[1][1]["message:message:character-shingles"] < 1.0 with self.tasks(): eventstream_state = eventstream.start_unmerge( project.id, [list(events.keys())[0]], source.id, destination.id ) unmerge.delay( project.id, source.id, destination.id, [list(events.keys())[0]], None, batch_size=5 ) eventstream.end_unmerge(eventstream_state) assert ( list( Group.objects.filter(id=merge_source.id).values_list( "times_seen", "first_seen", "last_seen" ) ) == [] ) assert list( Group.objects.filter(id=source.id).values_list("times_seen", "first_seen", "last_seen") ) == [(6, time_from_now(10), time_from_now(15))] assert list( Group.objects.filter(id=destination.id).values_list( "times_seen", "first_seen", "last_seen" ) ) == [(11, time_from_now(0), time_from_now(16))] assert source.id != destination.id assert source.project == destination.project destination_event_ids = map(lambda event: event.event_id, list(events.values())[1]) assert set( UserReport.objects.filter(group_id=source.id).values_list("event_id", flat=True) ) == set(destination_event_ids) assert set( GroupHash.objects.filter(group_id=source.id).values_list("hash", flat=True) ) == set(itertools.islice(events.keys(), 2)) assert set( GroupRelease.objects.filter(group_id=source.id).values_list( "environment", "first_seen", "last_seen" ) ) == set([(u"production", time_from_now(10), time_from_now(15))]) assert set( [ (gtv.value, gtv.times_seen) for gtv in tagstore.get_group_tag_values( project.id, destination.id, production_environment.id, "color" ) ] ) == set([(u"red", 4), (u"green", 3), (u"blue", 3)]) destination_event_ids = map( lambda event: event.event_id, list(events.values())[0] + list(events.values())[2] ) assert set( UserReport.objects.filter(group_id=destination.id).values_list("event_id", flat=True) ) == set(destination_event_ids) assert set( GroupHash.objects.filter(group_id=destination.id).values_list("hash", flat=True) ) == set(itertools.islice(events.keys(), 2, 3)) assert set( GroupRelease.objects.filter(group_id=destination.id).values_list( "environment", "first_seen", "last_seen" ) ) == set( [ ("production", time_from_now(0), time_from_now(9)), ("staging", time_from_now(16), time_from_now(16)), ] ) assert set( [ (gtk.value, gtk.times_seen) for gtk in tagstore.get_group_tag_values( project.id, destination.id, production_environment.id, "color" ) ] ) == set([("red", 4), ("blue", 3), ("green", 3)]) rollup_duration = 3600 time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, ) environment_time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, environment_ids=[production_environment.id], ) def get_expected_series_values(rollup, events, function=None): if function is None: def function(aggregate, event): return (aggregate if aggregate is not None else 0) + 1 expected = {} for event in events: k = float((to_timestamp(event.datetime) // rollup_duration) * rollup_duration) expected[k] = function(expected.get(k), event) return expected def assert_series_contains(expected, actual, default=0): actual = dict(actual) for key, value in expected.items(): assert actual.get(key, 0) == value for key in set(actual.keys()) - set(expected.keys()): assert actual.get(key, 0) == default assert_series_contains( get_expected_series_values(rollup_duration, list(events.values())[1]), time_series[source.id], 0, ) assert_series_contains( get_expected_series_values( rollup_duration, list(events.values())[0] + list(events.values())[2] ), time_series[destination.id], 0, ) assert_series_contains( get_expected_series_values(rollup_duration, list(events.values())[1]), environment_time_series[source.id], 0, ) assert_series_contains( get_expected_series_values( rollup_duration, list(events.values())[0][:-1] + list(events.values())[2] ), environment_time_series[destination.id], 0, ) time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, ) environment_time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, environment_id=production_environment.id, ) def collect_by_user_tag(aggregate, event): aggregate = aggregate if aggregate is not None else set() aggregate.add(get_event_user_from_interface(event.data["user"]).tag_value) return aggregate for series in [time_series, environment_time_series]: assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, list(events.values())[1], collect_by_user_tag ).items() }, series[source.id], ) assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, list(events.values())[0] + list(events.values())[2], collect_by_user_tag, ).items() }, time_series[destination.id], ) def strip_zeroes(data): for group_id, series in data.items(): for _, values in series: for key, val in list(values.items()): if val == 0: values.pop(key) return data def collect_by_release(group, aggregate, event): aggregate = aggregate if aggregate is not None else {} release = event.get_tag("sentry:release") if not release: return aggregate release = GroupRelease.objects.get( group_id=group.id, environment=event.data["environment"], release_id=Release.objects.get( organization_id=project.organization_id, version=release ).id, ).id aggregate[release] = aggregate.get(release, 0) + 1 return aggregate items = {} for i in [source.id, destination.id]: items[i] = list(GroupRelease.objects.filter(group_id=i).values_list("id", flat=True)) time_series = strip_zeroes( tsdb.get_frequency_series( tsdb.models.frequent_releases_by_group, items, now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, ) ) assert_series_contains( get_expected_series_values( rollup_duration, list(events.values())[1], functools.partial(collect_by_release, source), ), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, list(events.values())[0] + list(events.values())[2], functools.partial(collect_by_release, destination), ), time_series[destination.id], {}, ) items = {} for i in [source.id, destination.id]: items[i] = list(Environment.objects.all().values_list("id", flat=True)) time_series = strip_zeroes( tsdb.get_frequency_series( tsdb.models.frequent_environments_by_group, items, now - timedelta(seconds=rollup_duration), time_from_now(17), rollup_duration, ) ) def collect_by_environment(aggregate, event): aggregate = aggregate if aggregate is not None else {} environment = Environment.objects.get( organization_id=project.organization_id, name=event.data["environment"] ).id aggregate[environment] = aggregate.get(environment, 0) + 1 return aggregate assert_series_contains( get_expected_series_values( rollup_duration, list(events.values())[1], collect_by_environment ), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, list(events.values())[0] + list(events.values())[2], collect_by_environment, ), time_series[destination.id], {}, ) source_similar_items = features.compare(source) assert source_similar_items[0] == ( source.id, { "exception:message:character-shingles": None, "exception:stacktrace:application-chunks": None, "exception:stacktrace:pairs": None, "message:message:character-shingles": 1.0, }, ) assert source_similar_items[1][0] == destination.id assert source_similar_items[1][1]["message:message:character-shingles"] < 1.0 destination_similar_items = features.compare(destination) assert destination_similar_items[0] == ( destination.id, { "exception:message:character-shingles": None, "exception:stacktrace:application-chunks": None, "exception:stacktrace:pairs": None, "message:message:character-shingles": 1.0, }, ) assert destination_similar_items[1][0] == source.id assert destination_similar_items[1][1]["message:message:character-shingles"] < 1.0
def test_unmerge(self, mock_eventstream): eventstream_state = object() mock_eventstream.start_unmerge = Mock(return_value=eventstream_state) def shift(i): return timedelta(seconds=1 << i) now = timezone.now().replace(microsecond=0) - shift(16) project = self.create_project() source = self.create_group(project) sequence = itertools.count(0) tag_values = itertools.cycle(['red', 'green', 'blue']) user_values = itertools.cycle([ { 'id': 1 }, { 'id': 2 }, ]) for environment in ('production', ''): EnvironmentProject.objects.create( environment=Environment.objects.create( organization_id=project.organization_id, name=environment, ), project=project, ) def create_message_event(template, parameters, environment, release): i = next(sequence) event_id = uuid.UUID( fields=(i, 0x0, 0x1000, 0x80, 0x80, 0x808080808080, ), ).hex tags = [['color', next(tag_values)]] if environment: tags.append(['environment', environment]) if release: tags.append(['sentry:release', release]) event = Event.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, message='%s' % (id, ), datetime=now + shift(i), data={ 'environment': environment, 'type': 'default', 'metadata': { 'title': template % parameters, }, 'logentry': { 'message': template, 'params': parameters, 'formatted': template % parameters, }, 'user': next(user_values), 'tags': tags, }, ) with self.tasks(): Group.objects.add_tags( source, Environment.objects.get( organization_id=project.organization_id, name=environment ), tags=event.get_tags(), ) EventMapping.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, date_added=event.datetime, ) UserReport.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, name='Log Hat', email='*****@*****.**', comments='Quack', ) if release: Release.get_or_create( project=project, version=event.get_tag('sentry:release'), date_added=event.datetime, ) features.record([event]) return event events = OrderedDict() for event in (create_message_event('This is message #%s.', i, environment='production', release='version') for i in xrange(10)): events.setdefault(get_fingerprint(event), []).append(event) for event in (create_message_event('This is message #%s!', i, environment='production', release='version') for i in xrange(10, 16)): events.setdefault(get_fingerprint(event), []).append(event) event = create_message_event('This is message #%s!', 17, environment='', release=None) events.setdefault(get_fingerprint(event), []).append(event) assert len(events) == 2 assert sum(map(len, events.values())) == 17 # XXX: This is super contrived considering that it doesn't actually go # through the event pipeline, but them's the breaks, eh? for fingerprint in events.keys(): GroupHash.objects.create( project=project, group=source, hash=fingerprint, ) production_environment = Environment.objects.get( organization_id=project.organization_id, name='production' ) assert set( [(gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys(source.project_id, source.id, production_environment.id)] ) == set([ (u'color', 3), (u'environment', 1), (u'sentry:release', 1) ]) if settings.SENTRY_TAGSTORE.startswith('sentry.tagstore.v2'): assert set( [(gtv.key, gtv.value, gtv.times_seen, Environment.objects.get(pk=gtv._key.environment_id).name) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id, ).exclude(_key__environment_id=0)] ) == set([ ('color', 'red', 6, 'production'), ('sentry:release', 'version', 16, 'production'), ('color', 'blue', 5, 'production'), ('color', 'green', 5, 'production'), ('environment', 'production', 16, 'production'), ('color', 'green', 1, ''), ]) else: assert set( [(gtv.key, gtv.value, gtv.times_seen) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id, )] ) == set([ (u'color', u'red', 6), (u'color', u'green', 6), (u'color', u'blue', 5), (u'environment', u'production', 16), (u'sentry:release', u'version', 16), ]) assert features.compare(source) == [ (source.id, { 'exception:message:character-shingles': None, 'exception:stacktrace:application-chunks': None, 'exception:stacktrace:pairs': None, 'message:message:character-shingles': 1.0 }), ] with self.tasks(): unmerge.delay( source.project_id, source.id, None, [events.keys()[1]], None, batch_size=5, ) assert list( Group.objects.filter(id=source.id).values_list( 'times_seen', 'first_seen', 'last_seen', ) ) == [(10, now + shift(0), now + shift(9), )] source_activity = Activity.objects.get( group_id=source.id, type=Activity.UNMERGE_SOURCE, ) destination = Group.objects.get( id=source_activity.data['destination_id'], ) mock_eventstream.start_unmerge.assert_called_once_with( source.project_id, [events.keys()[1]], source.id, destination.id ) mock_eventstream.end_unmerge.assert_called_once_with(eventstream_state) assert list( Group.objects.filter(id=destination.id).values_list( 'times_seen', 'first_seen', 'last_seen', ) ) == [(7, now + shift(10), now + shift(16), )] assert source_activity.data == { 'destination_id': destination.id, 'fingerprints': [events.keys()[1]], } assert source.id != destination.id assert source.project == destination.project assert Activity.objects.get( group_id=destination.id, type=Activity.UNMERGE_DESTINATION, ).data == { 'source_id': source.id, 'fingerprints': [events.keys()[1]], } source_event_event_ids = map( lambda event: event.event_id, events.values()[0], ) assert source.event_set.count() == 10 assert set( EventMapping.objects.filter( group_id=source.id, ).values_list('event_id', flat=True) ) == set(source_event_event_ids) assert set( UserReport.objects.filter( group_id=source.id, ).values_list('event_id', flat=True) ) == set(source_event_event_ids) assert set(GroupHash.objects.filter( group_id=source.id, ).values_list('hash', flat=True)) == set([events.keys()[0]]) assert set( GroupRelease.objects.filter( group_id=source.id, ).values_list('environment', 'first_seen', 'last_seen') ) == set([ (u'production', now + shift(0), now + shift(9), ), ]) assert set( [(gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys(source.project_id, source.id, production_environment.id)] ) == set([ (u'color', 3), (u'environment', 1), (u'sentry:release', 1), ]) if settings.SENTRY_TAGSTORE.startswith('sentry.tagstore.v2'): env_filter = {'_key__environment_id': production_environment.id} else: env_filter = {} assert set( [(gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id, **env_filter )] ) == set([ (u'color', u'red', 4, now + shift(0), now + shift(9), ), (u'color', u'green', 3, now + shift(1), now + shift(7), ), (u'color', u'blue', 3, now + shift(2), now + shift(8), ), (u'environment', u'production', 10, now + shift(0), now + shift(9), ), (u'sentry:release', u'version', 10, now + shift(0), now + shift(9), ), ]) destination_event_event_ids = map( lambda event: event.event_id, events.values()[1], ) assert destination.event_set.count() == 7 assert set( EventMapping.objects.filter( group_id=destination.id, ).values_list('event_id', flat=True) ) == set(destination_event_event_ids) assert set( UserReport.objects.filter( group_id=destination.id, ).values_list('event_id', flat=True) ) == set(destination_event_event_ids) assert set( GroupHash.objects.filter( group_id=destination.id, ).values_list('hash', flat=True) ) == set([events.keys()[1]]) assert set( GroupRelease.objects.filter( group_id=destination.id, ).values_list('environment', 'first_seen', 'last_seen') ) == set([ (u'production', now + shift(10), now + shift(15), ), ]) assert set([(gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys(source.project_id, source.id, production_environment.id)] ) == set( [ (u'color', 3), (u'environment', 1), (u'sentry:release', 1), ] ) if settings.SENTRY_TAGSTORE.startswith('sentry.tagstore.v2'): assert set( [(gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=destination.project_id, group_id=destination.id, **env_filter )] ) == set([ (u'color', u'red', 2, now + shift(12), now + shift(15), ), (u'color', u'green', 2, now + shift(10), now + shift(13), ), (u'color', u'blue', 2, now + shift(11), now + shift(14), ), (u'environment', u'production', 6, now + shift(10), now + shift(15), ), (u'sentry:release', u'version', 6, now + shift(10), now + shift(15), ), ]) else: assert set( [(gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=destination.project_id, group_id=destination.id, **env_filter )] ) == set([ (u'color', u'red', 2, now + shift(12), now + shift(15), ), (u'color', u'green', 3, now + shift(10), now + shift(16), ), (u'color', u'blue', 2, now + shift(11), now + shift(14), ), (u'environment', u'production', 6, now + shift(10), now + shift(15), ), (u'sentry:release', u'version', 6, now + shift(10), now + shift(15), ), ]) rollup_duration = 3600 time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(15), rollup_duration, ) environment_time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(15), rollup_duration, environment_ids=[production_environment.id], ) def get_expected_series_values(rollup, events, function=None): if function is None: def function(aggregate, event): return (aggregate if aggregate is not None else 0) + 1 expected = {} for event in events: k = float((to_timestamp(event.datetime) // rollup_duration) * rollup_duration) expected[k] = function(expected.get(k), event) return expected def assert_series_contains(expected, actual, default=0): actual = dict(actual) for key, value in expected.items(): assert actual.get(key, 0) == value for key in set(actual.keys()) - set(expected.keys()): assert actual.get(key, 0) == default for series in [time_series, environment_time_series]: assert_series_contains( get_expected_series_values(rollup_duration, events.values()[0]), series[source.id], 0, ) assert_series_contains( get_expected_series_values(rollup_duration, events.values()[1][:-1]), series[destination.id], 0, ) time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) environment_time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, environment_id=production_environment.id, ) def collect_by_user_tag(aggregate, event): aggregate = aggregate if aggregate is not None else set() aggregate.add( get_event_user_from_interface( event.data['user'], ).tag_value, ) return aggregate for series in [time_series, environment_time_series]: assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[0], collect_by_user_tag, ).items() }, series[source.id], ) assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[1], collect_by_user_tag, ).items() }, time_series[destination.id], ) time_series = tsdb.get_most_frequent_series( tsdb.models.frequent_releases_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) def collect_by_release(group, aggregate, event): aggregate = aggregate if aggregate is not None else {} release = event.get_tag('sentry:release') if not release: return aggregate release = GroupRelease.objects.get( group_id=group.id, environment=event.data['environment'], release_id=Release.objects.get( organization_id=project.organization_id, version=release, ).id, ).id aggregate[release] = aggregate.get(release, 0) + 1 return aggregate assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0], functools.partial( collect_by_release, source, ), ), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[1], functools.partial( collect_by_release, destination, ), ), time_series[destination.id], {}, ) time_series = tsdb.get_most_frequent_series( tsdb.models.frequent_environments_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) def collect_by_environment(aggregate, event): aggregate = aggregate if aggregate is not None else {} environment = Environment.objects.get( organization_id=project.organization_id, name=event.data['environment'], ).id aggregate[environment] = aggregate.get(environment, 0) + 1 return aggregate assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0], collect_by_environment, ), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[1], collect_by_environment, ), time_series[destination.id], {}, ) source_similar_items = features.compare(source) assert source_similar_items[0] == (source.id, { 'exception:message:character-shingles': None, 'exception:stacktrace:application-chunks': None, 'exception:stacktrace:pairs': None, 'message:message:character-shingles': 1.0, }) assert source_similar_items[1][0] == destination.id assert source_similar_items[1][1]['message:message:character-shingles'] < 1.0 destination_similar_items = features.compare(destination) assert destination_similar_items[0] == ( destination.id, { 'exception:message:character-shingles': None, 'exception:stacktrace:application-chunks': None, 'exception:stacktrace:pairs': None, 'message:message:character-shingles': 1.0 } ) assert destination_similar_items[1][0] == source.id assert destination_similar_items[1][1]['message:message:character-shingles'] < 1.0
def test_unmerge(self, mock_eventstream): eventstream_state = object() mock_eventstream.start_unmerge = Mock(return_value=eventstream_state) def shift(i): return timedelta(seconds=1 << i) now = timezone.now().replace(microsecond=0) - shift(16) project = self.create_project() source = self.create_group(project) sequence = itertools.count(0) tag_values = itertools.cycle(['red', 'green', 'blue']) user_values = itertools.cycle([ { 'id': 1 }, { 'id': 2 }, ]) for environment in ('production', ''): EnvironmentProject.objects.create( environment=Environment.objects.create( organization_id=project.organization_id, name=environment, ), project=project, ) def create_message_event(template, parameters, environment, release): i = next(sequence) event_id = uuid.UUID(fields=( i, 0x0, 0x1000, 0x80, 0x80, 0x808080808080, ), ).hex tags = [['color', next(tag_values)]] if environment: tags.append(['environment', environment]) if release: tags.append(['sentry:release', release]) event = Event.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, message='%s' % (id, ), datetime=now + shift(i), data={ 'environment': environment, 'type': 'default', 'metadata': { 'title': template % parameters, }, 'logentry': { 'message': template, 'params': parameters, 'formatted': template % parameters, }, 'user': next(user_values), 'tags': tags, }, ) with self.tasks(): Group.objects.add_tags( source, Environment.objects.get( organization_id=project.organization_id, name=environment), tags=event.tags, ) EventMapping.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, date_added=event.datetime, ) UserReport.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, name='Log Hat', email='*****@*****.**', comments='Quack', ) if release: Release.get_or_create( project=project, version=event.get_tag('sentry:release'), date_added=event.datetime, ) features.record([event]) return event events = OrderedDict() for event in (create_message_event('This is message #%s.', i, environment='production', release='version') for i in xrange(10)): events.setdefault(get_fingerprint(event), []).append(event) for event in (create_message_event('This is message #%s!', i, environment='production', release='version') for i in xrange(10, 16)): events.setdefault(get_fingerprint(event), []).append(event) event = create_message_event('This is message #%s!', 17, environment='', release=None) events.setdefault(get_fingerprint(event), []).append(event) assert len(events) == 2 assert sum(map(len, events.values())) == 17 # XXX: This is super contrived considering that it doesn't actually go # through the event pipeline, but them's the breaks, eh? for fingerprint in events.keys(): GroupHash.objects.create( project=project, group=source, hash=fingerprint, ) production_environment = Environment.objects.get( organization_id=project.organization_id, name='production') assert set([ (gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys( source.project_id, source.id, [production_environment.id]) ]) == set([(u'color', 3), (u'environment', 1), (u'sentry:release', 1)]) if settings.SENTRY_TAGSTORE.startswith('sentry.tagstore.v2'): assert set([ (gtv.key, gtv.value, gtv.times_seen, Environment.objects.get(pk=gtv._key.environment_id).name) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id, ).exclude(_key__environment_id=0) ]) == set([ ('color', 'red', 6, 'production'), ('sentry:release', 'version', 16, 'production'), ('color', 'blue', 5, 'production'), ('color', 'green', 5, 'production'), ('environment', 'production', 16, 'production'), ('color', 'green', 1, ''), ]) else: assert set([(gtv.key, gtv.value, gtv.times_seen) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id, )]) == set([ (u'color', u'red', 6), (u'color', u'green', 6), (u'color', u'blue', 5), (u'environment', u'production', 16), (u'sentry:release', u'version', 16), ]) assert features.compare(source) == [ (source.id, { 'exception:message:character-shingles': None, 'exception:stacktrace:application-chunks': None, 'exception:stacktrace:pairs': None, 'message:message:character-shingles': 1.0 }), ] with self.tasks(): unmerge.delay( source.project_id, source.id, None, [events.keys()[1]], None, batch_size=5, ) assert list( Group.objects.filter(id=source.id).values_list( 'times_seen', 'first_seen', 'last_seen', )) == [( 10, now + shift(0), now + shift(9), )] source_activity = Activity.objects.get( group_id=source.id, type=Activity.UNMERGE_SOURCE, ) destination = Group.objects.get( id=source_activity.data['destination_id'], ) mock_eventstream.start_unmerge.assert_called_once_with( source.project_id, [events.keys()[1]], source.id, destination.id) mock_eventstream.end_unmerge.assert_called_once_with(eventstream_state) assert list( Group.objects.filter(id=destination.id).values_list( 'times_seen', 'first_seen', 'last_seen', )) == [( 7, now + shift(10), now + shift(16), )] assert source_activity.data == { 'destination_id': destination.id, 'fingerprints': [events.keys()[1]], } assert source.id != destination.id assert source.project == destination.project assert Activity.objects.get( group_id=destination.id, type=Activity.UNMERGE_DESTINATION, ).data == { 'source_id': source.id, 'fingerprints': [events.keys()[1]], } source_event_event_ids = map( lambda event: event.event_id, events.values()[0], ) assert source.event_set.count() == 10 assert set( EventMapping.objects.filter(group_id=source.id, ).values_list( 'event_id', flat=True)) == set(source_event_event_ids) assert set( UserReport.objects.filter(group_id=source.id, ).values_list( 'event_id', flat=True)) == set(source_event_event_ids) assert set( GroupHash.objects.filter(group_id=source.id, ).values_list( 'hash', flat=True)) == set([events.keys()[0]]) assert set( GroupRelease.objects.filter(group_id=source.id, ).values_list( 'environment', 'first_seen', 'last_seen')) == set([ ( u'production', now + shift(0), now + shift(9), ), ]) assert set([ (gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys( source.project_id, source.id, [production_environment.id]) ]) == set([ (u'color', 3), (u'environment', 1), (u'sentry:release', 1), ]) if settings.SENTRY_TAGSTORE.startswith('sentry.tagstore.v2'): env_filter = {'_key__environment_id': production_environment.id} else: env_filter = {} assert set([ (gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id, **env_filter) ]) == set([ ( u'color', u'red', 4, now + shift(0), now + shift(9), ), ( u'color', u'green', 3, now + shift(1), now + shift(7), ), ( u'color', u'blue', 3, now + shift(2), now + shift(8), ), ( u'environment', u'production', 10, now + shift(0), now + shift(9), ), ( u'sentry:release', u'version', 10, now + shift(0), now + shift(9), ), ]) destination_event_event_ids = map( lambda event: event.event_id, events.values()[1], ) assert destination.event_set.count() == 7 assert set( EventMapping.objects.filter(group_id=destination.id, ).values_list( 'event_id', flat=True)) == set(destination_event_event_ids) assert set( UserReport.objects.filter(group_id=destination.id, ).values_list( 'event_id', flat=True)) == set(destination_event_event_ids) assert set( GroupHash.objects.filter(group_id=destination.id, ).values_list( 'hash', flat=True)) == set([events.keys()[1]]) assert set( GroupRelease.objects.filter(group_id=destination.id, ).values_list( 'environment', 'first_seen', 'last_seen')) == set([ ( u'production', now + shift(10), now + shift(15), ), ]) assert set([ (gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys( source.project_id, source.id, [production_environment.id]) ]) == set([ (u'color', 3), (u'environment', 1), (u'sentry:release', 1), ]) if settings.SENTRY_TAGSTORE.startswith('sentry.tagstore.v2'): assert set([(gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=destination.project_id, group_id=destination.id, **env_filter)]) == set([ ( u'color', u'red', 2, now + shift(12), now + shift(15), ), ( u'color', u'green', 2, now + shift(10), now + shift(13), ), ( u'color', u'blue', 2, now + shift(11), now + shift(14), ), ( u'environment', u'production', 6, now + shift(10), now + shift(15), ), ( u'sentry:release', u'version', 6, now + shift(10), now + shift(15), ), ]) else: assert set([(gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=destination.project_id, group_id=destination.id, **env_filter)]) == set([ ( u'color', u'red', 2, now + shift(12), now + shift(15), ), ( u'color', u'green', 3, now + shift(10), now + shift(16), ), ( u'color', u'blue', 2, now + shift(11), now + shift(14), ), ( u'environment', u'production', 6, now + shift(10), now + shift(15), ), ( u'sentry:release', u'version', 6, now + shift(10), now + shift(15), ), ]) rollup_duration = 3600 time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(15), rollup_duration, ) environment_time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(15), rollup_duration, environment_ids=[production_environment.id], ) def get_expected_series_values(rollup, events, function=None): if function is None: def function(aggregate, event): return (aggregate if aggregate is not None else 0) + 1 expected = {} for event in events: k = float((to_timestamp(event.datetime) // rollup_duration) * rollup_duration) expected[k] = function(expected.get(k), event) return expected def assert_series_contains(expected, actual, default=0): actual = dict(actual) for key, value in expected.items(): assert actual.get(key, 0) == value for key in set(actual.keys()) - set(expected.keys()): assert actual.get(key, 0) == default for series in [time_series, environment_time_series]: assert_series_contains( get_expected_series_values(rollup_duration, events.values()[0]), series[source.id], 0, ) assert_series_contains( get_expected_series_values(rollup_duration, events.values()[1][:-1]), series[destination.id], 0, ) time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) environment_time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, environment_id=production_environment.id, ) def collect_by_user_tag(aggregate, event): aggregate = aggregate if aggregate is not None else set() aggregate.add( get_event_user_from_interface( event.data['user'], ).tag_value, ) return aggregate for series in [time_series, environment_time_series]: assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[0], collect_by_user_tag, ).items() }, series[source.id], ) assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[1], collect_by_user_tag, ).items() }, time_series[destination.id], ) time_series = tsdb.get_most_frequent_series( tsdb.models.frequent_releases_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) def collect_by_release(group, aggregate, event): aggregate = aggregate if aggregate is not None else {} release = event.get_tag('sentry:release') if not release: return aggregate release = GroupRelease.objects.get( group_id=group.id, environment=event.data['environment'], release_id=Release.objects.get( organization_id=project.organization_id, version=release, ).id, ).id aggregate[release] = aggregate.get(release, 0) + 1 return aggregate assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0], functools.partial( collect_by_release, source, ), ), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[1], functools.partial( collect_by_release, destination, ), ), time_series[destination.id], {}, ) time_series = tsdb.get_most_frequent_series( tsdb.models.frequent_environments_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) def collect_by_environment(aggregate, event): aggregate = aggregate if aggregate is not None else {} environment = Environment.objects.get( organization_id=project.organization_id, name=event.data['environment'], ).id aggregate[environment] = aggregate.get(environment, 0) + 1 return aggregate assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0], collect_by_environment, ), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[1], collect_by_environment, ), time_series[destination.id], {}, ) source_similar_items = features.compare(source) assert source_similar_items[0] == (source.id, { 'exception:message:character-shingles': None, 'exception:stacktrace:application-chunks': None, 'exception:stacktrace:pairs': None, 'message:message:character-shingles': 1.0, }) assert source_similar_items[1][0] == destination.id assert source_similar_items[1][1][ 'message:message:character-shingles'] < 1.0 destination_similar_items = features.compare(destination) assert destination_similar_items[0] == (destination.id, { 'exception:message:character-shingles': None, 'exception:stacktrace:application-chunks': None, 'exception:stacktrace:pairs': None, 'message:message:character-shingles': 1.0 }) assert destination_similar_items[1][0] == source.id assert destination_similar_items[1][1][ 'message:message:character-shingles'] < 1.0
def test_unmerge(self, mock_eventstream): eventstream_state = object() mock_eventstream.start_unmerge = Mock(return_value=eventstream_state) def shift(i): return timedelta(seconds=1 << i) now = timezone.now() - shift(16) project = self.create_project() source = self.create_group(project) sequence = itertools.count(0) tag_values = itertools.cycle(["red", "green", "blue"]) user_values = itertools.cycle([{"id": 1}, {"id": 2}]) for environment in ("production", ""): EnvironmentProject.objects.create( environment=Environment.objects.create( organization_id=project.organization_id, name=environment), project=project, ) def create_message_event(template, parameters, environment, release): i = next(sequence) event_id = uuid.UUID(fields=(i, 0x0, 0x1000, 0x80, 0x80, 0x808080808080)).hex tags = [["color", next(tag_values)]] if environment: tags.append(["environment", environment]) if release: tags.append(["sentry:release", release]) event = Event.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, message="%s" % (id, ), datetime=now + shift(i), data={ "environment": environment, "type": "default", "metadata": { "title": template % parameters }, "logentry": { "message": template, "params": parameters, "formatted": template % parameters, }, "user": next(user_values), "tags": tags, }, ) with self.tasks(): Group.objects.add_tags( source, Environment.objects.get( organization_id=project.organization_id, name=environment), tags=event.tags, ) UserReport.objects.create( project_id=project.id, group_id=source.id, event_id=event_id, name="Log Hat", email="*****@*****.**", comments="Quack", ) if release: Release.get_or_create( project=project, version=event.get_tag("sentry:release"), date_added=event.datetime, ) features.record([event]) return event events = OrderedDict() for event in (create_message_event("This is message #%s.", i, environment="production", release="version") for i in xrange(10)): events.setdefault(get_fingerprint(event), []).append(event) for event in (create_message_event("This is message #%s!", i, environment="production", release="version") for i in xrange(10, 16)): events.setdefault(get_fingerprint(event), []).append(event) event = create_message_event("This is message #%s!", 17, environment="", release=None) events.setdefault(get_fingerprint(event), []).append(event) assert len(events) == 2 assert sum(map(len, events.values())) == 17 # XXX: This is super contrived considering that it doesn't actually go # through the event pipeline, but them's the breaks, eh? for fingerprint in events.keys(): GroupHash.objects.create(project=project, group=source, hash=fingerprint) production_environment = Environment.objects.get( organization_id=project.organization_id, name="production") assert set([ (gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys( source.project_id, source.id, [production_environment.id]) ]) == set([(u"color", 3), (u"environment", 1), (u"sentry:release", 1)]) if settings.SENTRY_TAGSTORE.startswith("sentry.tagstore.v2"): assert set([( gtv.key, gtv.value, gtv.times_seen, Environment.objects.get(pk=gtv._key.environment_id).name, ) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id).exclude( _key__environment_id=0)]) == set([ ("color", "red", 6, "production"), ("sentry:release", "version", 16, "production"), ("color", "blue", 5, "production"), ("color", "green", 5, "production"), ("environment", "production", 16, "production"), ("color", "green", 1, ""), ]) else: assert set([(gtv.key, gtv.value, gtv.times_seen) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id) ]) == set([ (u"color", u"red", 6), (u"color", u"green", 6), (u"color", u"blue", 5), (u"environment", u"production", 16), (u"sentry:release", u"version", 16), ]) assert features.compare(source) == [( source.id, { "exception:message:character-shingles": None, "exception:stacktrace:application-chunks": None, "exception:stacktrace:pairs": None, "message:message:character-shingles": 1.0, }, )] with self.tasks(): unmerge.delay(source.project_id, source.id, None, [events.keys()[1]], None, batch_size=5) assert list( Group.objects.filter(id=source.id).values_list( "times_seen", "first_seen", "last_seen")) == [(10, now + shift(0), now + shift(9))] source_activity = Activity.objects.get(group_id=source.id, type=Activity.UNMERGE_SOURCE) destination = Group.objects.get( id=source_activity.data["destination_id"]) mock_eventstream.start_unmerge.assert_called_once_with( source.project_id, [events.keys()[1]], source.id, destination.id) mock_eventstream.end_unmerge.assert_called_once_with(eventstream_state) assert list( Group.objects.filter(id=destination.id).values_list( "times_seen", "first_seen", "last_seen")) == [(7, now + shift(10), now + shift(16))] assert source_activity.data == { "destination_id": destination.id, "fingerprints": [events.keys()[1]], } assert source.id != destination.id assert source.project == destination.project assert Activity.objects.get( group_id=destination.id, type=Activity.UNMERGE_DESTINATION).data == { "source_id": source.id, "fingerprints": [events.keys()[1]] } source_event_event_ids = map(lambda event: event.event_id, events.values()[0]) assert set( UserReport.objects.filter(group_id=source.id).values_list( "event_id", flat=True)) == set(source_event_event_ids) assert set( GroupHash.objects.filter(group_id=source.id).values_list( "hash", flat=True)) == set([events.keys()[0]]) assert set( GroupRelease.objects.filter(group_id=source.id).values_list( "environment", "first_seen", "last_seen")) == set([ (u"production", now + shift(0), now + shift(9)) ]) assert set([ (gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys( source.project_id, source.id, [production_environment.id]) ]) == set([(u"color", 3), (u"environment", 1), (u"sentry:release", 1)]) if settings.SENTRY_TAGSTORE.startswith("sentry.tagstore.v2"): env_filter = {"_key__environment_id": production_environment.id} else: env_filter = {} assert set([ (gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=source.project_id, group_id=source.id, **env_filter) ]) == set([ (u"color", u"red", 4, now + shift(0), now + shift(9)), (u"color", u"green", 3, now + shift(1), now + shift(7)), (u"color", u"blue", 3, now + shift(2), now + shift(8)), (u"environment", u"production", 10, now + shift(0), now + shift(9)), (u"sentry:release", u"version", 10, now + shift(0), now + shift(9)), ]) destination_event_event_ids = map(lambda event: event.event_id, events.values()[1]) assert set( UserReport.objects.filter(group_id=destination.id).values_list( "event_id", flat=True)) == set(destination_event_event_ids) assert set( GroupHash.objects.filter(group_id=destination.id).values_list( "hash", flat=True)) == set([events.keys()[1]]) assert set( GroupRelease.objects.filter(group_id=destination.id).values_list( "environment", "first_seen", "last_seen")) == set([ (u"production", now + shift(10), now + shift(15)) ]) assert set([ (gtk.key, gtk.values_seen) for gtk in tagstore.get_group_tag_keys( source.project_id, source.id, [production_environment.id]) ]) == set([(u"color", 3), (u"environment", 1), (u"sentry:release", 1)]) if settings.SENTRY_TAGSTORE.startswith("sentry.tagstore.v2"): assert set([ (gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=destination.project_id, group_id=destination.id, **env_filter) ]) == set([ (u"color", u"red", 2, now + shift(12), now + shift(15)), (u"color", u"green", 2, now + shift(10), now + shift(13)), (u"color", u"blue", 2, now + shift(11), now + shift(14)), (u"environment", u"production", 6, now + shift(10), now + shift(15)), (u"sentry:release", u"version", 6, now + shift(10), now + shift(15)), ]) else: assert set([ (gtv.key, gtv.value, gtv.times_seen, gtv.first_seen, gtv.last_seen) for gtv in GroupTagValue.objects.filter( project_id=destination.project_id, group_id=destination.id, **env_filter) ]) == set([ (u"color", u"red", 2, now + shift(12), now + shift(15)), (u"color", u"green", 3, now + shift(10), now + shift(16)), (u"color", u"blue", 2, now + shift(11), now + shift(14)), (u"environment", u"production", 6, now + shift(10), now + shift(15)), (u"sentry:release", u"version", 6, now + shift(10), now + shift(15)), ]) rollup_duration = 3600 time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(15), rollup_duration, ) environment_time_series = tsdb.get_range( tsdb.models.group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(15), rollup_duration, environment_ids=[production_environment.id], ) def get_expected_series_values(rollup, events, function=None): if function is None: def function(aggregate, event): return (aggregate if aggregate is not None else 0) + 1 expected = {} for event in events: k = float((to_timestamp(event.datetime) // rollup_duration) * rollup_duration) expected[k] = function(expected.get(k), event) return expected def assert_series_contains(expected, actual, default=0): actual = dict(actual) for key, value in expected.items(): assert actual.get(key, 0) == value for key in set(actual.keys()) - set(expected.keys()): assert actual.get(key, 0) == default for series in [time_series, environment_time_series]: assert_series_contains( get_expected_series_values(rollup_duration, events.values()[0]), series[source.id], 0, ) assert_series_contains( get_expected_series_values(rollup_duration, events.values()[1][:-1]), series[destination.id], 0, ) time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) environment_time_series = tsdb.get_distinct_counts_series( tsdb.models.users_affected_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, environment_id=production_environment.id, ) def collect_by_user_tag(aggregate, event): aggregate = aggregate if aggregate is not None else set() aggregate.add( get_event_user_from_interface(event.data["user"]).tag_value) return aggregate for series in [time_series, environment_time_series]: assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[0], collect_by_user_tag).items() }, series[source.id], ) assert_series_contains( { timestamp: len(values) for timestamp, values in get_expected_series_values( rollup_duration, events.values()[1], collect_by_user_tag).items() }, time_series[destination.id], ) time_series = tsdb.get_most_frequent_series( tsdb.models.frequent_releases_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) def collect_by_release(group, aggregate, event): aggregate = aggregate if aggregate is not None else {} release = event.get_tag("sentry:release") if not release: return aggregate release = GroupRelease.objects.get( group_id=group.id, environment=event.data["environment"], release_id=Release.objects.get( organization_id=project.organization_id, version=release).id, ).id aggregate[release] = aggregate.get(release, 0) + 1 return aggregate assert_series_contains( get_expected_series_values( rollup_duration, events.values()[0], functools.partial(collect_by_release, source)), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values( rollup_duration, events.values()[1], functools.partial(collect_by_release, destination), ), time_series[destination.id], {}, ) time_series = tsdb.get_most_frequent_series( tsdb.models.frequent_environments_by_group, [source.id, destination.id], now - timedelta(seconds=rollup_duration), now + shift(16), rollup_duration, ) def collect_by_environment(aggregate, event): aggregate = aggregate if aggregate is not None else {} environment = Environment.objects.get( organization_id=project.organization_id, name=event.data["environment"]).id aggregate[environment] = aggregate.get(environment, 0) + 1 return aggregate assert_series_contains( get_expected_series_values(rollup_duration, events.values()[0], collect_by_environment), time_series[source.id], {}, ) assert_series_contains( get_expected_series_values(rollup_duration, events.values()[1], collect_by_environment), time_series[destination.id], {}, ) source_similar_items = features.compare(source) assert source_similar_items[0] == ( source.id, { "exception:message:character-shingles": None, "exception:stacktrace:application-chunks": None, "exception:stacktrace:pairs": None, "message:message:character-shingles": 1.0, }, ) assert source_similar_items[1][0] == destination.id assert source_similar_items[1][1][ "message:message:character-shingles"] < 1.0 destination_similar_items = features.compare(destination) assert destination_similar_items[0] == ( destination.id, { "exception:message:character-shingles": None, "exception:stacktrace:application-chunks": None, "exception:stacktrace:pairs": None, "message:message:character-shingles": 1.0, }, ) assert destination_similar_items[1][0] == source.id assert destination_similar_items[1][1][ "message:message:character-shingles"] < 1.0