def test_first_run_of_recurrent_report_happens(self):
        '''
        Scheduling a recurrent report should run the 1st run of the report right away
        and not wait for the scheduled run
        '''

        parameters = {
            'name': 'Edits - test',
            'cohort': {
                'id': self.cohort.id,
                'name': self.cohort.name,
            },
            'metric': {
                'name': 'NamespaceEdits',
                'namespaces': [0, 1, 2],
                'start_date': '2013-01-01 00:00:00',
                'end_date': '2013-01-03 00:00:00',
                'individualResults': True,
                'aggregateResults': False,
                'aggregateSum': False,
                'aggregateAverage': False,
                'aggregateStandardDeviation': False,
            },
            'recurrent': True,
            'public': True
        }

        # with patch.object(RunReport, '_run_child_report', mock_run):
        rr = RunReport(parameters, user_id=self.owner_user_id)
        rr._run_child_report = MagicMock()
        rr.task.delay(rr)
        rr._run_child_report.assert_called_once_with()
Пример #2
0
 def test_invalid_metric(self):
     run_report = RunReport()
     run_report.parse_request([{
         'name': 'Edits - test',
         'cohort': {
             'id': self.test_cohort_id,
         },
         'metric': {
             'name': 'NamespaceEdits',
             'namespaces': 'blah blah',
         },
     }])
 def test_run_report_finish(self):
     run_report = RunReport({
         'name': 'Edits - test',
         'cohort': {
             'id': self.cohort.id,
             'name': self.cohort.name,
         },
         'metric': {
             'name': 'NamespaceEdits',
         },
     }, user_id=self.owner_user_id)
     result = run_report.finish(['aggregate_result'])
     assert_equals(result[run_report.result_key], 'aggregate_result')
 def test_run_report_finish(self):
     run_report = RunReport(
         {
             'name': 'Edits - test',
             'cohort': {
                 'id': self.cohort.id,
                 'name': self.cohort.name,
             },
             'metric': {
                 'name': 'NamespaceEdits',
             },
         },
         user_id=self.owner_user_id)
     result = run_report.finish(['aggregate_result'])
     assert_equals(result[run_report.result_key], 'aggregate_result')
Пример #5
0
 def test_aggregated_response_bytes_added(self):
     desired_responses = [{
         'name': 'Edits - test',
         'cohort': {
             'id': self.test_cohort_id,
         },
         'metric': {
             'name': 'BytesAdded',
             'namespaces': [0, 1, 2],
             'start_date': '2013-06-01',
             'end_date': '2013-09-01',
             'individualResults': True,
             'aggregateResults': True,
             'aggregateSum': True,
             'aggregateAverage': False,
             'aggregateStandardDeviation': False,
         },
     }]
     jr = RunReport(desired_responses, user_id=self.test_user_id)
     results = jr.task.delay(jr).get()
     result_key = self.session.query(PersistentReport)\
         .filter(PersistentReport.id == jr.children[0].persistent_id)\
         .one()\
         .result_key
     results = results[result_key]
     assert_equals(
         results[Aggregation.IND][0][self.test_mediawiki_user_id]['net_sum'],
         6,
     )
     
     assert_equals(
         results[Aggregation.SUM]['positive_only_sum'],
         150,
     )
 def test_days_missed_0(self):
     missed_days = RunReport.days_missed(self.reports[0], self.session)
     assert_equals(missed_days, set([
         self.today - timedelta(days=1),
         self.today - timedelta(days=2),
         self.today - timedelta(days=11),
     ]))
 def test_basic_response(self):
     parameters = {
         'name': 'Edits - test',
         'cohort': {
             'id': self.cohort.id,
             'name': self.cohort.name,
         },
         'metric': {
             'name': 'NamespaceEdits',
             'namespaces': [0, 1, 2],
             'start_date': '2013-01-01 00:00:00',
             'end_date': '2013-01-02 00:00:00',
             'individualResults': True,
             'aggregateResults': False,
             'aggregateSum': False,
             'aggregateAverage': False,
             'aggregateStandardDeviation': False,
         },
     }
     jr = RunReport(parameters, user_id=self.owner_user_id)
     results = jr.task.delay(jr).get()
     self.session.commit()
     result_key = self.session.query(ReportStore) \
         .get(jr.persistent_id) \
         .result_key
     results = results[result_key]
     # TODO: figure out why one of the resulting wiki_user_ids is None here
     assert_equals(
         results[Aggregation.IND][self.editor(0)]['edits'],
         2,
     )
 def test_empty_response(self):
     """
     Case where user tries to submit form with no cohorts / metrics
     should be handled client side server side an exception will be
     thrown if RunReport object cannot be created
     """
     RunReport({}, user_id=self.owner_user_id)
    def test_aggregated_response_bytes_added(self):
        parameters = {
            'name': 'Edits - test',
            'cohort': {
                'id': self.cohort.id,
                'name': self.cohort.name,
            },
            'metric': {
                'name': 'BytesAdded',
                'namespaces': [0],
                'start_date': '2013-01-01 00:00:00',
                'end_date': '2013-01-03 00:00:00',
                'individualResults': True,
                'aggregateResults': True,
                'aggregateSum': True,
                'aggregateAverage': False,
                'aggregateStandardDeviation': False,
            },
        }
        jr = RunReport(parameters, user_id=self.owner_user_id)
        results = jr.task.delay(jr).get()
        self.session.commit()
        result_key = self.session.query(ReportStore) \
            .get(jr.persistent_id) \
            .result_key
        results = results[result_key]
        assert_equals(
            results[Aggregation.IND][self.editor(0)]['net_sum'],
            -90,
        )

        assert_equals(
            results[Aggregation.SUM]['positive_only_sum'],
            140,
        )
    def inject_and_fetch_recurrent_run(self):
        parameters = {
            'name': 'Edits - test',
            'cohort': {
                'id': self.cohort.id,
                'name': self.cohort.name,
            },
            'metric': {
                'name': 'NamespaceEdits',
                'namespaces': [0, 1, 2],
                'start_date': '2013-01-01 00:00:00',
                'end_date': '2013-01-03 00:00:00',
                'individualResults': True,
                'aggregateResults': False,
                'aggregateSum': False,
                'aggregateAverage': False,
                'aggregateStandardDeviation': False,
            },
            'recurrent': True,
        }

        jr = RunReport(parameters, user_id=self.owner_user_id)
        jr.task.delay(jr).get()
        self.session.commit()

        # executing directly the code that will be run by the scheduler
        recurring_reports()
        recurrent_runs = self.session.query(ReportStore) \
            .filter(ReportStore.recurrent_parent_id == jr.persistent_id) \
            .all()

        return recurrent_runs
    def test_raises_invalid_cohort_for_any_metric(self):
        self.cohort.validated = False
        self.session.commit()

        for name, metric in metric_classes.iteritems():
            if not metric.show_in_ui:
                continue

            parameters = {
                'name': '{0} - test'.format(name),
                'cohort': {
                    'id': self.cohort.id,
                    'name': self.cohort.name,
                },
                'metric': {
                    'name': name,
                    'namespaces': [0, 1, 2],
                    'start_date': '2013-01-01 00:00:00',
                    'end_date': '2013-01-02 00:00:00',
                    'individualResults': True,
                    'aggregateResults': False,
                    'aggregateSum': False,
                    'aggregateAverage': False,
                    'aggregateStandardDeviation': False,
                },
            }
            try:
                RunReport(parameters, user_id=self.owner_user_id)
            except InvalidCohort:
                continue
            assert_true(False)
Пример #12
0
 def test_basic_response(self):
     desired_responses = [{
         'name': 'Edits - test',
         'cohort': {
             'id': self.test_cohort_id,
         },
         'metric': {
             'name': 'NamespaceEdits',
             'namespaces': [0, 1, 2],
             'start_date': '2013-06-01',
             'end_date': '2013-09-01',
             'individualResults': True,
             'aggregateResults': False,
             'aggregateSum': False,
             'aggregateAverage': False,
             'aggregateStandardDeviation': False,
         },
     }]
     jr = RunReport(desired_responses, user_id=self.test_user_id)
     results = jr.task.delay(jr).get()
     result_key = self.session.query(PersistentReport)\
         .filter(PersistentReport.id == jr.children[0].persistent_id)\
         .one()\
         .result_key
     results = results[result_key]
     # TODO: figure out why one of the resulting wiki_user_ids is None here
     assert_equals(
         results[Aggregation.IND][0][self.test_mediawiki_user_id]['edits'],
         2,
     )
    def test_user_id_assigned_properly(self):
        parameters = {
            'name': 'Bytes - test',
            'cohort': {
                'id': self.cohort.id,
                'name': self.cohort.name,
            },
            'metric': {
                'name': 'BytesAdded',
                'namespaces': '0,1,2',
                'start_date': '2013-01-01 00:00:00',
                'end_date': '2013-01-02 00:00:00',
                'individualResults': True,
                'aggregateResults': True,
                'aggregateSum': True,
                'aggregateAverage': False,
                'aggregateStandardDeviation': False,
            },
        }

        jr = RunReport(parameters, user_id=self.owner_user_id)
        jr.task.delay(jr).get()
        self.session.commit()
        # executing directly the code that will be run by the scheduler
        recurring_reports()

        # make sure all report nodes have a user_id
        no_user_id = self.session.query(func.count(ReportStore)) \
            .filter(ReportStore.user_id == None) \
            .one()[0]
        assert_equals(no_user_id, 0)
 def test_days_missed_2(self):
     self.add_runs_to_report(2)
     missed_days = RunReport.days_missed(self.reports[2], self.session)
     assert_equals(
         set(missed_days),
         set([
             self.today - timedelta(days=m) for m in self.missed_by_index[2]
         ]))
 def test_create_reports_for_missed_days_2(self):
     new_runs = list(RunReport.create_reports_for_missed_days(
         self.reports[2], self.session
     ))
     assert_equals(set([r.created for r in new_runs]), set([
         self.today - timedelta(days=1),
         self.today - timedelta(days=2),
     ]))
    def test_rerun(self):
        def count_report_stores():
            return len(self.session.query(ReportStore).all())

        prev_report_count = count_report_stores()
        result = RunReport.rerun(self.reports[0]).get()
        assert_equals(type(result), dict)
        next_report_count = count_report_stores()
        assert_equals(prev_report_count, next_report_count)
 def test_days_missed_1(self):
     missed_days = RunReport.days_missed(self.reports[1], self.session)
     assert_equals(missed_days, set([
         self.today - timedelta(days=1),
         self.today - timedelta(days=2),
         self.today - timedelta(days=11),
         # NOTE: search stops at 30 days, so it doesn't matter that
         # the 31 and 33 days-ago runs were missed
     ]))
 def test_create_reports_for_missed_days_2(self):
     self.add_runs_to_report(2)
     new_runs = list(
         RunReport.create_reports_for_missed_days(
             self.reports[2], self.session, no_more_than=self.no_more_than))
     assert_equals(
         set([r.created for r in new_runs]),
         set([
             self.today - timedelta(days=m) for m in self.missed_by_index[2]
         ]))
    def test_days_missed_2_with_cleanup(self):
        self.add_runs_to_report(2)

        # change some reports back to pending to make sure they're cleaned up
        map(make_pending, self.report_runs[0::2])
        self.session.commit()

        missed_days = RunReport.days_missed(self.reports[2], self.session)
        assert_equals(
            set(missed_days),
            set([
                self.today - timedelta(days=m) for m in self.missed_by_index[2]
            ] + [r.created for r in self.report_runs[0::2]]))
 def test_run_report_repr(self):
     run_report = RunReport(
         {
             'name': 'Edits - test',
             'cohort': {
                 'id': self.cohort.id,
                 'name': self.cohort.name,
             },
             'metric': {
                 'name': 'NamespaceEdits',
             },
         },
         user_id=self.owner_user_id)
     assert_true(str(run_report).find('RunReport') >= 0)
    def test_lots_of_concurrent_requests(self):
        parameters = {
            'name': 'Edits - test',
            'cohort': {
                'id': self.cohort.id,
                'name': self.cohort.name,
            },
            'metric': {
                'name': 'BytesAdded',
                'namespaces': '0,1,2',
                'start_date': '2013-01-01 00:00:00',
                'end_date': '2013-01-02 00:00:00',
                'individualResults': True,
                'aggregateResults': True,
                'aggregateSum': True,
                'aggregateAverage': False,
                'aggregateStandardDeviation': False,
            },
        }
        reports = []
        # NOTE: you can make this loop as much as you'd like if celery
        # is allowed enough concurrent workers, set via CELERYD_CONCURRENCY
        trials = 3
        for i in range(trials):
            jr = RunReport(parameters, user_id=self.owner_user_id)
            reports.append((jr, jr.task.delay(jr)))

        successes = 0
        for jr, delayed in reports:
            try:
                results = delayed.get()
                self.session.commit()
                result_key = self.session.query(ReportStore) \
                    .get(jr.persistent_id) \
                    .result_key
                results = results[result_key]
                if results[Aggregation.SUM]['positive_only_sum'] == 140:
                    successes += 1
            except SoftTimeLimitExceeded:
                print('Timeout expired during this task.')
            except Exception:
                print('An exception occurred during this task.')
                raise

        print('Successes: {0}'.format(successes))
        assert_true(successes == trials, 'all of the trials must succeed')
def recurring_reports(report_id=None):
    from wikimetrics.configurables import db
    from wikimetrics.models import ReportStore, RunReport
    
    replication_lag_service = ReplicationLagService()
    if replication_lag_service.is_any_lagged():
        task_logger.warning(
            'Replication lag detected. '
            'Hence, skipping creating new recurring reports.'
        )
        return

    try:
        session = db.get_session()
        query = session.query(ReportStore) \
            .filter(ReportStore.recurrent) \
        
        if report_id is not None:
            query = query.filter(ReportStore.id == report_id)
        
        for report in query.all():
            try:
                task_logger.info('Running recurring report "{0}"'.format(report))
                no_more_than = queue.conf.get('MAX_INSTANCES_PER_RECURRENT_REPORT')
                kwargs = dict()
                if no_more_than:
                    kwargs['no_more_than'] = no_more_than
                
                days_to_run = RunReport.create_reports_for_missed_days(
                    report,
                    session,
                    **kwargs
                )
                for day_to_run in days_to_run:
                    day_to_run.task.delay(day_to_run)
            
            except Exception:
                task_logger.error('Problem running recurring report "{}": {}'.format(
                    report, traceback.format_exc()
                ))
    
    except Exception:
        task_logger.error('Problem running recurring reports: {}'.format(
            traceback.format_exc()
        ))
    def test_wiki_cohort_runs(self):
        self.create_wiki_cohort()
        # give the WikiCohort all the users of self.cohort, for easy testing
        self.session.execute(WikiUserStore.__table__.update().values(
            validating_cohort=self.basic_wiki_cohort.id).where(
                WikiUserStore.validating_cohort == self.cohort.id))
        self.session.execute(CohortWikiUserStore.__table__.update().values(
            cohort_id=self.basic_wiki_cohort.id).where(
                CohortWikiUserStore.cohort_id == self.cohort.id))
        self.cohort = self.basic_wiki_cohort
        parameters = {
            'name': 'Edits - test',
            'cohort': {
                'id': self.cohort.id,
                'name': self.cohort.name,
            },
            'metric': {
                'name': 'NamespaceEdits',
                'namespaces': [0, 1, 2],
                'start_date': '2013-01-01 00:00:00',
                'end_date': '2013-01-02 00:00:00',
                'individualResults': True,
                'aggregateResults': True,
                'aggregateSum': True,
                'aggregateAverage': False,
                'aggregateStandardDeviation': False,
            },
        }
        jr = RunReport(parameters, user_id=self.owner_user_id)
        results = jr.task.delay(jr).get()
        self.session.commit()
        result_key = self.session.query(ReportStore) \
            .get(jr.persistent_id) \
            .result_key
        results = results[result_key]
        assert_equals(
            results[Aggregation.IND][self.editor(0)]['edits'],
            2,
        )

        assert_equals(
            results[Aggregation.SUM]['edits'],
            4,
        )
    def test_aggregated_response_namespace_edits_with_timeseries(self):
        parameters = {
            'name': 'Edits - test',
            'cohort': {
                'id': self.cohort.id,
                'name': self.cohort.name,
            },
            'metric': {
                'name': 'NamespaceEdits',
                'namespaces': [0, 1, 2],
                'start_date': '2013-01-01 00:20:00',
                'end_date': '2013-03-01 00:00:00',
                'timeseries': TimeseriesChoices.MONTH,
                'individualResults': True,
                'aggregateResults': True,
                'aggregateSum': True,
                'aggregateAverage': False,
                'aggregateStandardDeviation': False,
            },
        }
        jr = RunReport(parameters, user_id=self.owner_user_id)
        results = jr.task.delay(jr).get()
        self.session.commit()
        result_key = self.session.query(ReportStore) \
            .get(jr.persistent_id) \
            .result_key
        results = results[result_key]

        key = results[Aggregation.IND][self.editor(0)]['edits'].items()[0][0]
        assert_equals(key, '2013-01-01 00:20:00')

        assert_equals(
            results[Aggregation.SUM]['edits'].items()[0][0],
            '2013-01-01 00:20:00',
        )

        assert_equals(
            results[Aggregation.SUM]['edits']['2013-01-01 00:20:00'],
            8,
        )
        assert_equals(
            results[Aggregation.SUM]['edits']['2013-02-01 00:00:00'],
            2,
        )
    def test_many_parallel_runs(self):
        parameters = {
            'name': 'Edits - test',
            'cohort': {
                'id': self.cohort.id,
                'name': self.cohort.name,
            },
            'metric': {
                'name': 'NamespaceEdits',
                'namespaces': [0, 1, 2],
                'start_date': '2013-01-01 00:00:00',
                'end_date': '2013-01-03 00:00:00',
                'individualResults': True,
                'aggregateResults': False,
                'aggregateSum': False,
                'aggregateAverage': False,
                'aggregateStandardDeviation': False,
            },
            'recurrent': True,
        }

        jr = RunReport(parameters,
                       user_id=self.owner_user_id,
                       created=datetime.today() -
                       timedelta(days=self.total_runs))
        jr.task.delay(jr).get()
        self.session.commit()

        # executing directly the code that will be run by the scheduler
        recurring_reports()

        recurrent_runs = self.session.query(ReportStore) \
            .filter(ReportStore.recurrent_parent_id == jr.persistent_id) \
            .all()

        successful_runs = filter(lambda x: x.status == 'SUCCESS',
                                 recurrent_runs)

        # make sure we have one and no more than one recurrent run
        assert_equal(len(successful_runs), self.total_runs)
Пример #26
0
def recurring_reports():
    from wikimetrics.configurables import db
    from wikimetrics.models import PersistentReport, RunReport
    
    try:
        session = db.get_session()
        recurrent_reports = session.query(PersistentReport) \
            .filter(PersistentReport.recurrent) \
            .all()
        
        for report in recurrent_reports:
            try:
                task_logger.info('Running recurring report "{0}"'.format(report))
                days_to_run = RunReport.create_reports_for_missed_days(report, session)
                for day_to_run in days_to_run:
                    day_to_run.task.delay(day_to_run)
            except Exception, e:
                task_logger.error('Problem running recurring report "{0}": {1}'.format(
                    report, e
                ))
    except Exception, e:
        task_logger.error('Problem running recurring reports: {0}'.format(e))
    def test_invalid_metric(self):
        jr = RunReport(
            {
                'name': 'Edits - test',
                'cohort': {
                    'id': self.cohort.id,
                },
                'metric': {
                    'name': 'NamespaceEdits',
                    'start_date': 'blah',
                },
            },
            user_id=self.owner_user_id)

        results = jr.task.delay(jr).get()
        print results
        self.session.commit()
        result_key = self.session.query(ReportStore) \
            .get(jr.persistent_id) \
            .result_key
        assert_true(
            results[result_key]['FAILURE'].find(
                'Edits was incorrectly configured') >= 0, )
Пример #28
0
def recurring_reports(report_id=None):
    from wikimetrics.configurables import db
    from wikimetrics.models import ReportStore, RunReport
    
    try:
        session = db.get_session()
        query = session.query(ReportStore) \
            .filter(ReportStore.recurrent) \
        
        if report_id is not None:
            query = query.filter(ReportStore.id == report_id)

        for report in query.all():
            try:
                task_logger.info('Running recurring report "{0}"'.format(report))
                days_to_run = RunReport.create_reports_for_missed_days(report, session)
                for day_to_run in days_to_run:
                    day_to_run.task.delay(day_to_run)
            except Exception, e:
                task_logger.error('Problem running recurring report "{0}": {1}'.format(
                    report, e
                ))
    except Exception, e:
        task_logger.error('Problem running recurring reports: {0}'.format(e))
Пример #29
0
def reports_request():
    """
    Renders a page that facilitates kicking off a new report
    """

    if request.method == 'GET':
        return render_template('report.html')
    else:
        desired_responses = json.loads(request.form['responses'])
        recurrent = json.loads(request.form.get('recurrent', 'false'))

        for parameters in desired_responses:
            parameters['recurrent'] = recurrent
            # NOTE: this is not a mistake.  Currently recurrent => public on creation
            parameters['public'] = recurrent
            # Encode cohort description for the case it contains special characters
            if ('description' in parameters['cohort'] and
                    parameters['cohort']['description'] is not None):
                encoded_description = parameters['cohort']['description'].encode('utf-8')
                parameters['cohort']['description'] = encoded_description
            jr = RunReport(parameters, user_id=current_user.id)
            jr.task.delay(jr)

        return json_redirect(url_for('reports_index'))
Пример #30
0
def rerun_report(report_id):
    session = db.get_session()
    report = session.query(ReportStore).get(report_id)
    RunReport.rerun(report)
    return json_response(message='Report scheduled for rerun')
Пример #31
0
 def test_empty_response(self):
     # TODO: handle case where user tries to submit form with no cohorts / metrics
     jr = RunReport([], user_id=self.test_user_id)
     result = jr.task.delay(jr).get()
     assert_equals(result, [])
Пример #32
0
 def test_run_report_finish(self):
     run_report = RunReport([])
     result = run_report.finish([])
     assert_equals(result[run_report.result_key], 'Finished')
Пример #33
0
 def test_run_report_repr(self):
     run_report = RunReport([])
     assert_true(str(run_report).find('RunReport') >= 0)
 def test_create_reports_for_missed_days_3(self):
     self.add_runs_to_report(3)
     new_runs = list(
         RunReport.create_reports_for_missed_days(
             self.reports[3], self.session, no_more_than=self.no_more_than))
     assert_equals(len(new_runs), self.no_more_than)
 def test_invalid_report(self):
     RunReport({})