def test_basic_response(self): metric = metric_classes['NamespaceEdits']( name='NamespaceEdits', namespaces=[0, 1, 2], start_date='2013-01-01 00:00:00', end_date='2013-01-02 00:00:00', ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.cohort, options, user_id=self.owner_user_id, ) result = ar.task.delay(ar).get() assert_equals(result[Aggregation.IND][self.editor(0)]['edits'], 2) assert_equals(result[Aggregation.AVG]['edits'], r(1.0)) assert_equals(result[Aggregation.STD]['edits'], r(1.0))
def test_aggregate_empty_results(self): ''' Tests what happens when no users are returned for the initial metric run so there are no users to agreggate ''' self.create_wiki_cohort() metric = metric_classes['NamespaceEdits']( name='NamespaceEdits', namespaces=[0, 1, 2], start_date='2010-01-01 00:00:00', end_date='2010-01-02 00:00:00', ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.basic_wiki_cohort, options, user_id=self.basic_wiki_cohort_owner, ) result = ar.task.delay(ar).get() assert_equals(result[Aggregation.IND].keys(), []) assert_equals(result[Aggregation.SUM]['edits'], r(0)) assert_equals(result[Aggregation.AVG]['edits'], r(0)) assert_equals(result[Aggregation.STD]['edits'], r(0))
def test_finish(self): metric = metric_classes['NamespaceEdits']( name='NamespaceEdits', namespaces=[0, 1, 2], start_date='2013-05-01 00:00:00', end_date='2013-09-01 00:00:00', ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.cohort, options, user_id=self.owner_user_id, ) finished = ar.finish([ { 1: {'edits': 2}, 2: {'edits': 3}, 3: {'edits': 0}, None: {'edits': 0} }, ]) assert_equals( finished[Aggregation.SUM]['edits'], 5 ) assert_equals( finished[Aggregation.AVG]['edits'], r(1.25) ) finished = ar.finish([ { 1: {'other_sub_metric': r(2.3)}, 2: {'other_sub_metric': r(3.4)}, 3: {'other_sub_metric': r(0.0)}, None: {'other_sub_metric': 0} }, ]) assert_equals( finished[Aggregation.SUM]['other_sub_metric'], r(5.7) ) assert_equals( finished[Aggregation.AVG]['other_sub_metric'], r(1.425) ) assert_equals( finished[Aggregation.STD]['other_sub_metric'], r(1.4771) )
def test_finish(self): metric = metric_classes['NamespaceEdits']( name='NamespaceEdits', namespaces=[0, 1, 2], start_date='2013-05-01 00:00:00', end_date='2013-09-01 00:00:00', ) ar = AggregateReport( self.cohort, metric, individual=True, aggregate=True, aggregate_sum=True, aggregate_average=True, aggregate_std_deviation=True, user_id=self.owner_user_id, ) finished = ar.finish([ { 'namespace edits - fake cohort' : { 1: {'edits': 2}, 2: {'edits': 3}, 3: {'edits': 0}, None: {'edits': 0} } }, { 'some other metric - fake cohort' : { 1: {'other_sub_metric': r(2.3)}, 2: {'other_sub_metric': r(3.4)}, 3: {'other_sub_metric': r(0.0)}, None: {'other_sub_metric': 0} } }, ]) assert_equals( finished[ar.result_key][Aggregation.SUM]['edits'], 5 ) assert_equals( finished[ar.result_key][Aggregation.SUM]['other_sub_metric'], r(5.7) ) assert_equals( finished[ar.result_key][Aggregation.AVG]['edits'], r(1.25) ) assert_equals( finished[ar.result_key][Aggregation.AVG]['other_sub_metric'], r(1.425) ) assert_equals( finished[ar.result_key][Aggregation.STD]['other_sub_metric'], r(1.4771) )
def test_basic_response(self): metric = metric_classes['NamespaceEdits']( name='NamespaceEdits', namespaces=[0, 1, 2], start_date='2013-01-01 00:00:00', end_date='2013-01-02 00:00:00', ) ar = AggregateReport( self.cohort, metric, individual=True, aggregate=True, aggregate_sum=True, aggregate_average=True, aggregate_std_deviation=True, user_id=self.owner_user_id, ) result = ar.task.delay(ar).get() self.session.commit() aggregate_key = self.session.query(PersistentReport)\ .filter(PersistentReport.id == ar.persistent_id)\ .one()\ .result_key assert_equals( result[aggregate_key][Aggregation.IND][0][self.editors[0].user_id] ['edits'], 2 ) assert_equals( result[aggregate_key][Aggregation.AVG]['edits'], r(1.0) ) assert_equals( result[aggregate_key][Aggregation.STD]['edits'], r(1.0) )
def test_timeseries_day(self): metric = NamespaceEdits( namespaces=[0], start_date='2012-12-31 00:00:00', end_date='2013-01-03 00:00:00', timeseries=TimeseriesChoices.DAY, ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.cohort, options, user_id=self.owner_user_id, ) results = ar.task.delay(ar).get() self.session.commit() assert_equals( results[Aggregation.IND][self.editor(0)]['edits'], { '2012-12-31 00:00:00' : 1, '2013-01-01 00:00:00' : 2, '2013-01-02 00:00:00' : 0, } ) assert_equals( results[Aggregation.SUM]['edits'], { '2012-12-31 00:00:00' : 1, '2013-01-01 00:00:00' : 5, '2013-01-02 00:00:00' : 2, } ) assert_equals( results[Aggregation.AVG]['edits'], { '2012-12-31 00:00:00' : r(0.25), '2013-01-01 00:00:00' : r(1.25), '2013-01-02 00:00:00' : r(0.5), } ) assert_equals( results[Aggregation.STD]['edits'], { '2012-12-31 00:00:00' : r(0.4330), '2013-01-01 00:00:00' : r(0.4330), '2013-01-02 00:00:00' : r(0.8660), } )
def test_timeseries_day(self): metric = NamespaceEdits( namespaces=[0], start_date='2012-12-31 00:00:00', end_date='2013-01-03 00:00:00', timeseries=TimeseriesChoices.DAY, ) ar = AggregateReport( self.cohort, metric, individual=True, aggregate=True, aggregate_sum=True, aggregate_average=True, aggregate_std_deviation=True, user_id=self.owner_user_id, ) results = ar.task.delay(ar).get() self.session.commit() aggregate_key = self.session.query(PersistentReport)\ .filter(PersistentReport.id == ar.persistent_id)\ .one()\ .result_key assert_equals( results[aggregate_key][Aggregation.IND][0][self.editors[0].user_id]['edits'], { '2012-12-31 00:00:00' : 1, '2013-01-01 00:00:00' : 2, '2013-01-02 00:00:00' : 0, } ) assert_equals( results[aggregate_key][Aggregation.SUM]['edits'], { '2012-12-31 00:00:00' : 1, '2013-01-01 00:00:00' : 5, '2013-01-02 00:00:00' : 2, } ) assert_equals( results[aggregate_key][Aggregation.AVG]['edits'], { '2012-12-31 00:00:00' : r(0.25), '2013-01-01 00:00:00' : r(1.25), '2013-01-02 00:00:00' : r(0.5), } ) assert_equals( results[aggregate_key][Aggregation.STD]['edits'], { '2012-12-31 00:00:00' : r(0.4330), '2013-01-01 00:00:00' : r(0.4330), '2013-01-02 00:00:00' : r(0.8660), } )
def test_basic_response(self): metric = metric_classes['NamespaceEdits']( name='NamespaceEdits', namespaces=[0, 1, 2], start_date='2013-01-01 00:00:00', end_date='2013-01-02 00:00:00', ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.cohort, options, user_id=self.owner_user_id, ) result = ar.task.delay(ar).get() assert_equals( result[Aggregation.IND][self.editor(0)] ['edits'], 2 ) assert_equals( result[Aggregation.AVG]['edits'], r(1.0) ) assert_equals( result[Aggregation.STD]['edits'], r(1.0) )
def test_timeseries_day(self): metric = NamespaceEdits( namespaces=[0], start_date='2012-12-31 00:00:00', end_date='2013-01-03 00:00:00', timeseries=TimeseriesChoices.DAY, ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.cohort, options, user_id=self.owner_user_id, ) results = ar.task.delay(ar).get() self.session.commit() assert_equals( results[Aggregation.IND][self.editor(0)]['edits'], { '2012-12-31 00:00:00': 1, '2013-01-01 00:00:00': 2, '2013-01-02 00:00:00': 0, }) assert_equals( results[Aggregation.SUM]['edits'], { '2012-12-31 00:00:00': 1, '2013-01-01 00:00:00': 5, '2013-01-02 00:00:00': 2, }) assert_equals( results[Aggregation.AVG]['edits'], { '2012-12-31 00:00:00': r(0.25), '2013-01-01 00:00:00': r(1.25), '2013-01-02 00:00:00': r(0.5), }) assert_equals( results[Aggregation.STD]['edits'], { '2012-12-31 00:00:00': r(0.4330), '2013-01-01 00:00:00': r(0.4330), '2013-01-02 00:00:00': r(0.8660), })
def test_finish_timeseries(self): metric = NamespaceEdits( namespaces=[0], start_date='2012-12-31 00:00:00', end_date='2013-01-03 00:00:00', timeseries=TimeseriesChoices.DAY, ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.cohort, options, user_id=self.owner_user_id, ) finished = ar.finish([ { 1: { 'edits': { 'date1': 1, 'date2': 2 } }, 2: { 'edits': { 'date1': 0, 'date2': 1 } }, 3: { 'edits': { 'date1': 0, 'date2': 0 } }, None: { 'edits': { 'date1': None, 'date2': None } } }, ]) assert_equals(finished[Aggregation.SUM]['edits'], { 'date1': 1, 'date2': 3 }) assert_equals(finished[Aggregation.AVG]['edits'], { 'date1': r(0.3333), 'date2': r(1.0) }) assert_equals(finished[Aggregation.STD]['edits'], { 'date1': r(0.4714), 'date2': r(0.8165) }) finished = ar.finish([ { 1: { 'other_sub_metric': { 'date3': r(2.3), 'date4': 0 } }, 2: { 'other_sub_metric': { 'date3': 0, 'date4': r(3.4) } }, 3: { 'other_sub_metric': { 'date3': None, 'date4': None } }, None: { 'other_sub_metric': { 'date3': None, 'date4': None } } }, ]) assert_equals(finished[Aggregation.SUM]['other_sub_metric'], { 'date3': r(2.3), 'date4': r(3.4) }) assert_equals(finished[Aggregation.AVG]['other_sub_metric'], { 'date3': r(1.15), 'date4': r(1.7) }) assert_equals(finished[Aggregation.STD]['other_sub_metric'], { 'date3': r(1.15), 'date4': r(1.7) })
def test_finish_timeseries(self): metric = NamespaceEdits( namespaces=[0], start_date='2012-12-31 00:00:00', end_date='2013-01-03 00:00:00', timeseries=TimeseriesChoices.DAY, ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.cohort, options, user_id=self.owner_user_id, ) finished = ar.finish([ { 1: {'edits': {'date1': 1, 'date2': 2}}, 2: {'edits': {'date1': 0, 'date2': 1}}, 3: {'edits': {'date1': 0, 'date2': 0}}, None: {'edits': {'date1': None, 'date2': None}} }, ]) assert_equals( finished[Aggregation.SUM]['edits'], {'date1': 1, 'date2': 3} ) assert_equals( finished[Aggregation.AVG]['edits'], {'date1': r(0.3333), 'date2': r(1.0)} ) assert_equals( finished[Aggregation.STD]['edits'], {'date1': r(0.4714), 'date2': r(0.8165)} ) finished = ar.finish([ { 1: {'other_sub_metric': {'date3': r(2.3), 'date4': 0}}, 2: {'other_sub_metric': {'date3': 0, 'date4': r(3.4)}}, 3: {'other_sub_metric': {'date3': None, 'date4': None}}, None: {'other_sub_metric': {'date3': None, 'date4': None}} }, ]) assert_equals( finished[Aggregation.SUM]['other_sub_metric'], {'date3': r(2.3), 'date4': r(3.4)} ) assert_equals( finished[Aggregation.AVG]['other_sub_metric'], {'date3': r(1.15), 'date4': r(1.7)} ) assert_equals( finished[Aggregation.STD]['other_sub_metric'], {'date3': r(1.15), 'date4': r(1.7)} )
def calculate(self, results_by_user, type_of_aggregate, average=None): # TODO: terrible redo this """ Calculates one type of aggregate by just iterating over the individual results Takes into account that results and aggregates may be split up by timeseries Also makes sure to ignore censored records when appropriate Parameters list_of_results : list of individual results type_of_aggregate : can be SUM, AVG, STD average : None by default but required when computing STD Returns The aggregate specified, computed at the timeseries level if applicable """ aggregation = dict() helper = dict() for user_id in results_by_user.keys(): for key in results_by_user[user_id]: # the CENSORED key indicates that this user has censored # results for this metric. It is not aggregate-able if key == CENSORED: continue value = results_by_user[user_id][key] value_is_not_censored = CENSORED not in results_by_user[user_id]\ or results_by_user[user_id][CENSORED] != 1 # handle timeseries aggregation if isinstance(value, dict): if key not in aggregation: aggregation[key] = OrderedDict() helper[key] = dict() for subkey in value: if subkey not in aggregation[key]: aggregation[key][subkey] = 0 helper[key][subkey] = dict() helper[key][subkey]['sum'] = Decimal(0.0) helper[key][subkey]['square_diffs'] = Decimal(0.0) helper[key][subkey]['count'] = 0 if value_is_not_censored and not value[subkey] is None: helper[key][subkey]['sum'] += Decimal(value[subkey]) helper[key][subkey]['count'] += 1 if type_of_aggregate == Aggregation.STD: diff = Decimal(value[subkey]) - average[key][subkey] helper[key][subkey]['square_diffs'] += Decimal( pow(diff, 2) ) if type_of_aggregate == Aggregation.SUM: aggregation[key][subkey] = r(helper[key][subkey]['sum']) elif type_of_aggregate == Aggregation.AVG: aggregation[key][subkey] = r(safe_average( helper[key][subkey]['sum'], helper[key][subkey]['count'] )) elif type_of_aggregate == Aggregation.STD: aggregation[key][subkey] = r(sqrt(safe_average( helper[key][subkey]['square_diffs'], helper[key][subkey]['count'] ))) # handle normal aggregation else: if key not in aggregation: aggregation[key] = 0 helper[key] = dict() helper[key]['sum'] = Decimal(0.0) helper[key]['square_diffs'] = Decimal(0.0) helper[key]['count'] = 0 if value_is_not_censored and value is not None: helper[key]['sum'] += Decimal(value) helper[key]['count'] += 1 if type_of_aggregate == Aggregation.STD: diff = Decimal(value) - average[key] helper[key]['square_diffs'] += Decimal(pow(diff, 2)) if type_of_aggregate == Aggregation.SUM: aggregation[key] = r(helper[key]['sum']) elif type_of_aggregate == Aggregation.AVG: aggregation[key] = r(safe_average( helper[key]['sum'], helper[key]['count'] )) elif type_of_aggregate == Aggregation.STD: aggregation[key] = r(sqrt(safe_average( helper[key]['square_diffs'], helper[key]['count'] ))) return aggregation
def safe_average(cummulative_sum, count): if count != 0: return r(cummulative_sum / count) else: return 0
def test_finish_timeseries(self): metric = NamespaceEdits( namespaces=[0], start_date='2012-12-31 00:00:00', end_date='2013-01-03 00:00:00', timeseries=TimeseriesChoices.DAY, ) ar = AggregateReport( self.cohort, metric, individual=True, aggregate=True, aggregate_sum=True, aggregate_average=True, aggregate_std_deviation=True, user_id=self.owner_user_id, ) finished = ar.finish([ { 'namespace edits - fake cohort' : { 1: {'edits': {'date1': 1, 'date2': 2}}, 2: {'edits': {'date1': 0, 'date2': 1}}, 3: {'edits': {'date1': 0, 'date2': 0}}, None: {'edits': {'date1': None, 'date2': None}} } }, { 'some other metric - fake cohort' : { 1: {'other_sub_metric': {'date3': r(2.3), 'date4': 0}}, 2: {'other_sub_metric': {'date3': 0, 'date4': r(3.4)}}, 3: {'other_sub_metric': {'date3': None, 'date4': None}}, None: {'other_sub_metric': {'date3': None, 'date4': None}} } }, ]) assert_equals( finished[ar.result_key][Aggregation.SUM]['edits'], {'date1': 1, 'date2': 3} ) assert_equals( finished[ar.result_key][Aggregation.SUM]['other_sub_metric'], {'date3': r(2.3), 'date4': r(3.4)} ) assert_equals( finished[ar.result_key][Aggregation.AVG]['edits'], {'date1': r(0.3333), 'date2': r(1.0)} ) assert_equals( finished[ar.result_key][Aggregation.AVG]['other_sub_metric'], {'date3': r(1.15), 'date4': r(1.7)} ) assert_equals( finished[ar.result_key][Aggregation.STD]['edits'], {'date1': r(0.4714), 'date2': r(0.8165)} ) assert_equals( finished[ar.result_key][Aggregation.STD]['other_sub_metric'], {'date3': r(1.15), 'date4': r(1.7)} )
def test_finish(self): metric = metric_classes['NamespaceEdits']( name='NamespaceEdits', namespaces=[0, 1, 2], start_date='2013-05-01 00:00:00', end_date='2013-09-01 00:00:00', ) options = { 'individualResults': True, 'aggregateResults': True, 'aggregateSum': True, 'aggregateAverage': True, 'aggregateStandardDeviation': True, } ar = AggregateReport( metric, self.cohort, options, user_id=self.owner_user_id, ) finished = ar.finish([ { 1: { 'edits': 2 }, 2: { 'edits': 3 }, 3: { 'edits': 0 }, None: { 'edits': 0 } }, ]) assert_equals(finished[Aggregation.SUM]['edits'], 5) assert_equals(finished[Aggregation.AVG]['edits'], r(1.25)) finished = ar.finish([ { 1: { 'other_sub_metric': r(2.3) }, 2: { 'other_sub_metric': r(3.4) }, 3: { 'other_sub_metric': r(0.0) }, None: { 'other_sub_metric': 0 } }, ]) assert_equals(finished[Aggregation.SUM]['other_sub_metric'], r(5.7)) assert_equals(finished[Aggregation.AVG]['other_sub_metric'], r(1.425)) assert_equals(finished[Aggregation.STD]['other_sub_metric'], r(1.4771))