def test_answer_distribution(self):
     self.task.launch([
         'AnswerDistributionOneFilePerCourseTask',
         '--src', as_list_param(self.test_src),
         '--dest', url_path_join(self.test_root, 'dst'),
         '--name', 'test',
         '--output-root', self.test_out,
         '--include', as_list_param('"*"'),
         '--manifest', url_path_join(self.test_root, 'manifest.txt'),
         '--base-input-format', self.input_format,
         '--lib-jar', as_list_param(self.oddjob_jar),
         '--n-reduce-tasks', str(self.NUM_REDUCERS),
     ])
     self.validate_output()
    def test_answer_distribution_mysql(self):
        self.task.launch([
            'AnswerDistributionToMySQLTaskWorkflow',
            '--src', as_list_param(self.test_src),
            '--dest', url_path_join(self.test_root, 'dst'),
            '--name', 'test',
            '--include', as_list_param('"*"'),
            '--manifest', url_path_join(self.test_root, 'manifest.txt'),
            '--base-input-format', self.input_format,
            '--lib-jar', as_list_param(self.oddjob_jar),
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
            '--credentials', self.export_db.credentials_file_url,
        ])

        self.validate_output()
    def test_event_log_exports_using_manifest(self):
        config_override = {
            'manifest': {
                'threshold': 1
            }
        }

        folders = {
            'edx': self.PROD_FOLDER,
            'edge': self.EDGE_FOLDER
        }
        for environment in ['edx', 'edge']:
            self.task.launch([
                'EventExportTask',
                '--source', as_list_param(url_path_join(self.test_src, environment)),
                '--output-root', self.test_out,
                '--config', self.test_config,
                '--environment', environment,
                '--interval', '2014-05',
                '--gpg-key-dir', self.test_gpg_key_dir,
                '--gpg-master-key', '*****@*****.**',
                '--required-path-text', folders[environment],
                '--n-reduce-tasks', str(self.NUM_REDUCERS),
            ], config_override)

        self.validate_output()
    def run_and_check(self, interval_type):
        self.task.launch([
            'StudentEngagementToMysqlTask',
            '--source', as_list_param(self.test_src),
            '--credentials', self.export_db.credentials_file_url,
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
            '--interval', '2015-09-01-2015-09-16',
            '--interval-type', interval_type,
        ])

        with self.export_db.cursor() as cursor:
            cursor.execute(
                'SELECT end_date, course_id, username, '
                'forum_posts, forum_responses, forum_comments, '
                'forum_upvotes_given, forum_upvotes_received '
                'FROM student_engagement_{interval_type} WHERE course_id="{course_id}" '
                'ORDER BY end_date, username;'
                .format(course_id=self.COURSE_ID, interval_type=interval_type)
            )
            results = cursor.fetchall()

        if interval_type == 'weekly':
            end_date_expected = datetime.date(2015, 9, 15)
        elif interval_type == 'daily':
            end_date_expected = datetime.date(2015, 9, 14)
        else:
            assert False, "Invalid interval type: {}".format(interval_type)

        self.assertItemsEqual(results, [
            (end_date_expected, self.COURSE_ID, 'audit', 1, 0, 0, 3, 1),
            (end_date_expected, self.COURSE_ID, 'honor', 1, 1, 0, 0, 2),
            (end_date_expected, self.COURSE_ID, 'staff', 2, 0, 0, 1, 2),
            (end_date_expected, self.COURSE_ID, 'verified', 0, 0, 1, 1, 0),
        ])
    def test_location_by_course(self):
        self.upload_tracking_log(self.INPUT_FILE, self.START_DATE)

        for fixture_file_name in self.SQL_FIXTURES:
            self.execute_sql_fixture_file(fixture_file_name)

        self.task.launch([
            'InsertToMysqlLastCountryPerCourseTask',
            '--source', as_list_param(self.test_src),
            '--interval', self.DATE_INTERVAL.to_string(),
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
        ])

        self.maxDiff = None

        with self.export_db.cursor() as cursor:
            cursor.execute('SELECT * FROM course_enrollment_location_current ORDER BY country_code, course_id')
            results = cursor.fetchall()

        # TODO: what happens if the test starts near the UTC day boundary. The task sees that today is day "X", yet this
        # code sees the following day since the day boundary was crossed between then and now.
        today = datetime.utcnow().date()

        self.assertItemsEqual([
            row[1:6] for row in results
        ], [
            (today, self.COURSE_ID, None, 1, 1),
            (today, self.COURSE_ID, 'UNKNOWN', 0, 1),
            (today, self.COURSE_ID, 'IE', 1, 1),
            (today, self.COURSE_ID2, 'TH', 1, 1),
            (today, self.COURSE_ID, 'TH', 1, 1),
        ])
    def test_event_log_exports_using_manifest(self):
        config_override = {'manifest': {'threshold': 1}}

        folders = {'prod': self.PROD_FOLDER, 'edge': self.EDGE_FOLDER}
        for environment in ['prod', 'edge']:
            self.task.launch([
                'EventExportTask',
                '--source',
                as_list_param(url_path_join(self.test_src, environment)),
                '--output-root',
                self.test_out,
                '--config',
                self.test_config,
                '--environment',
                environment,
                '--interval',
                '2014-05',
                '--gpg-key-dir',
                self.test_gpg_key_dir,
                '--gpg-master-key',
                '*****@*****.**',
                '--required-path-text',
                folders[environment],
                '--n-reduce-tasks',
                str(self.NUM_REDUCERS),
            ], config_override)

        self.validate_output()
    def launch_task(self, output_root, extra_source=None, run_with_validation_events=True):
        """Run the enrollment validation workflow."""

        # Widen the interval to include the latest validation events.
        interval = self.WIDER_DATE_INTERVAL if run_with_validation_events else self.DATE_INTERVAL
        source_pattern = '[\\".*?.log-.*.gz\\"]'
        validation_pattern = '".*?enroll_validated_\d{8}\.log\.gz"'
        launch_args = [
            'EnrollmentValidationWorkflow',
            '--interval', interval,
            '--validation-root', self.test_validate,
            '--validation-pattern', validation_pattern,
            '--credentials', self.import_db.credentials_file_url,
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
            '--pattern', source_pattern,
            '--output-root', output_root,
        ]
        # An extra source means we're using synthetic events, so we
        # don't want to generate outside the interval in that case.
        if extra_source:
            launch_args.extend(['--source', '[\\"{}\\",\\"{}\\"]'.format(self.test_src, extra_source)])
        else:
            launch_args.extend(['--source', as_list_param(self.test_src)])
            launch_args.extend(['--generate-before'])
        if run_with_validation_events:
            launch_args.extend(['--expected-validation', "{}T00".format(self.END_DATE)])

        self.task.launch(launch_args)
    def test_user_activity(self):
        self.maxDiff = None
        self.upload_tracking_log(self.INPUT_FILE, self.END_DATE)

        self.task.launch([
            'InsertToMysqlCourseActivityTask',
            '--source', as_list_param(self.test_src),
            '--end-date', self.END_DATE.isoformat(),
            '--weeks', str(self.NUM_WEEKS),
            '--credentials', self.export_db.credentials_file_url,
            '--overwrite-n-days', '43',
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
            '--overwrite-mysql'
        ])

        with self.export_db.cursor() as cursor:
            cursor.execute('SELECT course_id, interval_start, interval_end, label, count FROM course_activity ORDER BY course_id, interval_end, label')
            results = cursor.fetchall()

        # pylint: disable=line-too-long
        self.assertItemsEqual([
            row for row in results
        ], [
            (self.COURSE_ID2, datetime.datetime(2014, 5, 19, 0, 0), datetime.datetime(2014, 5, 26, 0, 0), 'ACTIVE', 1),
            (self.COURSE_ID2, datetime.datetime(2014, 5, 19, 0, 0), datetime.datetime(2014, 5, 26, 0, 0), 'PLAYED_VIDEO', 1),
            (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ACTIVE', 4),
            (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ATTEMPTED_PROBLEM', 1),
            (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'PLAYED_VIDEO', 3),
            (self.COURSE_ID, datetime.datetime(2014, 6, 9, 0, 0), datetime.datetime(2014, 6, 16, 0, 0), 'ACTIVE', 1),
            (self.COURSE_ID, datetime.datetime(2014, 6, 9, 0, 0), datetime.datetime(2014, 6, 16, 0, 0), 'PLAYED_VIDEO', 1),
            (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ACTIVE', 4),
            (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ATTEMPTED_PROBLEM', 2),
            (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'PLAYED_VIDEO', 3),
        ])
 def run_task(self, interval_type):
     """Run the CSV-generating task."""
     self.task.launch([
         'StudentEngagementCsvFileTask',
         '--source', as_list_param(self.test_src),
         '--output-root', url_path_join(self.test_out, interval_type),
         '--n-reduce-tasks', str(self.NUM_REDUCERS),
         '--interval', self.interval,
         '--interval-type', interval_type,
     ])
    def test_database_import(self):
        self.task.launch([
            'ImportMysqlToVerticaTask',
            '--date', self.DATE,
            '--marker-schema', 'acceptance_marker',
            '--exclude-field', as_list_param('.*\\.field_to_exclude$'),
            '--overwrite',
        ])

        self.validate_output()
Exemplo n.º 11
0
 def run_obfuscated_package_task(self):
     """Run ObfuscatedPackageTask."""
     self.task.launch([
         'ObfuscatedPackageTask', '--course', self.filename_safe_course_id,
         '--obfuscated-output-root',
         url_path_join(self.test_root,
                       'obfuscated-output'), '--gpg-key-dir',
         self.test_gpg_key_dir, '--gpg-master-key', '*****@*****.**',
         '--output-root', self.test_out, '--recipient',
         as_list_param('*****@*****.**'), '--format-version',
         self.FORMAT_VERSION
     ])
Exemplo n.º 12
0
 def run_obfuscated_package_task(self):
     """Run ObfuscatedPackageTask."""
     self.task.launch([
         'ObfuscatedPackageTask',
         '--course', self.filename_safe_course_id,
         '--obfuscated-output-root', url_path_join(self.test_root, 'obfuscated-output'),
         '--gpg-key-dir', self.test_gpg_key_dir,
         '--gpg-master-key', '*****@*****.**',
         '--output-root', self.test_out,
         '--recipient', as_list_param('*****@*****.**'),
         '--format-version', self.FORMAT_VERSION
     ])
    def test_database_import(self):
        self.task.launch([
            'ImportMysqlToVerticaTask',
            '--date',
            self.DATE,
            '--marker-schema',
            'acceptance_marker',
            '--exclude-field',
            as_list_param('.*\\.field_to_exclude$'),
            '--overwrite',
        ])

        self.validate_output()
Exemplo n.º 14
0
    def test_base(self):
        self.upload_tracking_log(self.INPUT_FILE, datetime.date(2015, 8, 1))
        self.execute_sql_fixture_file('load_auth_userprofile.sql')

        self.task.launch([
            'TagsDistributionWorkflow',
            '--source', as_list_param(self.test_src),
            '--interval', '2010-01-01-2020-01-01',
            '--n-reduce-tasks', str(self.NUM_REDUCERS),
            '--output-root', url_path_join(self.test_out, 'tags_dist_acceptance', ''),
            '--database', self.export_db.database_name
        ])

        self.validate_base()
    def test_base(self):
        self.upload_tracking_log(self.INPUT_FILE, datetime.date(2015, 8, 1))
        self.execute_sql_fixture_file('load_auth_userprofile.sql')

        self.task.launch([
            'TagsDistributionWorkflow', '--source',
            as_list_param(self.test_src), '--interval',
            '2010-01-01-2020-01-01', '--n-reduce-tasks',
            str(self.NUM_REDUCERS), '--output-root',
            url_path_join(self.test_out, 'tags_dist_acceptance',
                          ''), '--database', self.export_db.database_name
        ])

        self.validate_base()
 def run_task(self, interval_type):
     """Run the CSV-generating task."""
     self.task.launch([
         'StudentEngagementCsvFileTask',
         '--source',
         as_list_param(self.test_src),
         '--output-root',
         url_path_join(self.test_out, interval_type),
         '--n-reduce-tasks',
         str(self.NUM_REDUCERS),
         '--interval',
         self.interval,
         '--interval-type',
         interval_type,
     ])
Exemplo n.º 17
0
    def test_user_activity(self):
        self.maxDiff = None
        self.upload_tracking_log(self.INPUT_FILE, self.END_DATE)

        self.task.launch([
            'InsertToMysqlCourseActivityTask', '--source',
            as_list_param(self.test_src), '--end-date',
            self.END_DATE.isoformat(), '--weeks',
            str(self.NUM_WEEKS), '--credentials',
            self.export_db.credentials_file_url, '--overwrite-n-days', '43',
            '--n-reduce-tasks',
            str(self.NUM_REDUCERS), '--overwrite-mysql'
        ])

        with self.export_db.cursor() as cursor:
            cursor.execute(
                'SELECT course_id, interval_start, interval_end, label, count FROM course_activity ORDER BY course_id, interval_end, label'
            )
            results = cursor.fetchall()

        # pylint: disable=line-too-long
        self.assertItemsEqual([row for row in results], [
            (self.COURSE_ID2, datetime.datetime(2014, 5, 19, 0, 0),
             datetime.datetime(2014, 5, 26, 0, 0), 'ACTIVE', 1),
            (self.COURSE_ID2, datetime.datetime(2014, 5, 19, 0, 0),
             datetime.datetime(2014, 5, 26, 0, 0), 'PLAYED_VIDEO', 1),
            (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0),
             datetime.datetime(2014, 6, 23, 0, 0), 'ACTIVE', 4),
            (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0),
             datetime.datetime(2014, 6, 23, 0, 0), 'ATTEMPTED_PROBLEM', 1),
            (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0),
             datetime.datetime(2014, 6, 23, 0, 0), 'PLAYED_VIDEO', 3),
            (self.COURSE_ID, datetime.datetime(2014, 6, 9, 0, 0),
             datetime.datetime(2014, 6, 16, 0, 0), 'ACTIVE', 1),
            (self.COURSE_ID, datetime.datetime(2014, 6, 9, 0, 0),
             datetime.datetime(2014, 6, 16, 0, 0), 'PLAYED_VIDEO', 1),
            (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0),
             datetime.datetime(2014, 6, 23, 0, 0), 'ACTIVE', 4),
            (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0),
             datetime.datetime(2014, 6, 23, 0, 0), 'ATTEMPTED_PROBLEM', 2),
            (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0),
             datetime.datetime(2014, 6, 23, 0, 0), 'PLAYED_VIDEO', 3),
        ])
    def launch_task(self,
                    output_root,
                    extra_source=None,
                    run_with_validation_events=True):
        """Run the enrollment validation workflow."""

        # Widen the interval to include the latest validation events.
        interval = self.WIDER_DATE_INTERVAL if run_with_validation_events else self.DATE_INTERVAL
        source_pattern = '[\\".*?.log-.*.gz\\"]'
        validation_pattern = '".*?enroll_validated_\d{8}\.log\.gz"'
        launch_args = [
            'EnrollmentValidationWorkflow',
            '--interval',
            interval,
            '--validation-root',
            self.test_validate,
            '--validation-pattern',
            validation_pattern,
            '--credentials',
            self.import_db.credentials_file_url,
            '--n-reduce-tasks',
            str(self.NUM_REDUCERS),
            '--pattern',
            source_pattern,
            '--output-root',
            output_root,
        ]
        # An extra source means we're using synthetic events, so we
        # don't want to generate outside the interval in that case.
        if extra_source:
            launch_args.extend([
                '--source',
                '[\\"{}\\",\\"{}\\"]'.format(self.test_src, extra_source)
            ])
        else:
            launch_args.extend(['--source', as_list_param(self.test_src)])
            launch_args.extend(['--generate-before'])
        if run_with_validation_events:
            launch_args.extend(
                ['--expected-validation', "{}T00".format(self.END_DATE)])

        self.task.launch(launch_args)
    def run_and_check(self, interval_type):
        self.task.launch([
            'StudentEngagementToMysqlTask',
            '--source',
            as_list_param(self.test_src),
            '--credentials',
            self.export_db.credentials_file_url,
            '--n-reduce-tasks',
            str(self.NUM_REDUCERS),
            '--interval',
            '2015-09-01-2015-09-16',
            '--interval-type',
            interval_type,
        ])

        with self.export_db.cursor() as cursor:
            cursor.execute(
                'SELECT end_date, course_id, username, '
                'forum_posts, forum_responses, forum_comments, '
                'forum_upvotes_given, forum_upvotes_received '
                'FROM student_engagement_{interval_type} WHERE course_id="{course_id}" '
                'ORDER BY end_date, username;'.format(
                    course_id=self.COURSE_ID, interval_type=interval_type))
            results = cursor.fetchall()

        if interval_type == 'weekly':
            end_date_expected = datetime.date(2015, 9, 15)
        elif interval_type == 'daily':
            end_date_expected = datetime.date(2015, 9, 14)
        else:
            assert False, "Invalid interval type: {}".format(interval_type)

        self.assertItemsEqual(results, [
            (end_date_expected, self.COURSE_ID, 'audit', 1, 0, 0, 3, 1),
            (end_date_expected, self.COURSE_ID, 'honor', 1, 1, 0, 0, 2),
            (end_date_expected, self.COURSE_ID, 'staff', 2, 0, 0, 1, 2),
            (end_date_expected, self.COURSE_ID, 'verified', 0, 0, 1, 1, 0),
        ])