def test_answer_distribution(self): self.task.launch([ 'AnswerDistributionOneFilePerCourseTask', '--src', as_list_param(self.test_src), '--dest', url_path_join(self.test_root, 'dst'), '--name', 'test', '--output-root', self.test_out, '--include', as_list_param('"*"'), '--manifest', url_path_join(self.test_root, 'manifest.txt'), '--base-input-format', self.input_format, '--lib-jar', as_list_param(self.oddjob_jar), '--n-reduce-tasks', str(self.NUM_REDUCERS), ]) self.validate_output()
def test_answer_distribution_mysql(self): self.task.launch([ 'AnswerDistributionToMySQLTaskWorkflow', '--src', as_list_param(self.test_src), '--dest', url_path_join(self.test_root, 'dst'), '--name', 'test', '--include', as_list_param('"*"'), '--manifest', url_path_join(self.test_root, 'manifest.txt'), '--base-input-format', self.input_format, '--lib-jar', as_list_param(self.oddjob_jar), '--n-reduce-tasks', str(self.NUM_REDUCERS), '--credentials', self.export_db.credentials_file_url, ]) self.validate_output()
def test_event_log_exports_using_manifest(self): config_override = { 'manifest': { 'threshold': 1 } } folders = { 'edx': self.PROD_FOLDER, 'edge': self.EDGE_FOLDER } for environment in ['edx', 'edge']: self.task.launch([ 'EventExportTask', '--source', as_list_param(url_path_join(self.test_src, environment)), '--output-root', self.test_out, '--config', self.test_config, '--environment', environment, '--interval', '2014-05', '--gpg-key-dir', self.test_gpg_key_dir, '--gpg-master-key', '*****@*****.**', '--required-path-text', folders[environment], '--n-reduce-tasks', str(self.NUM_REDUCERS), ], config_override) self.validate_output()
def run_and_check(self, interval_type): self.task.launch([ 'StudentEngagementToMysqlTask', '--source', as_list_param(self.test_src), '--credentials', self.export_db.credentials_file_url, '--n-reduce-tasks', str(self.NUM_REDUCERS), '--interval', '2015-09-01-2015-09-16', '--interval-type', interval_type, ]) with self.export_db.cursor() as cursor: cursor.execute( 'SELECT end_date, course_id, username, ' 'forum_posts, forum_responses, forum_comments, ' 'forum_upvotes_given, forum_upvotes_received ' 'FROM student_engagement_{interval_type} WHERE course_id="{course_id}" ' 'ORDER BY end_date, username;' .format(course_id=self.COURSE_ID, interval_type=interval_type) ) results = cursor.fetchall() if interval_type == 'weekly': end_date_expected = datetime.date(2015, 9, 15) elif interval_type == 'daily': end_date_expected = datetime.date(2015, 9, 14) else: assert False, "Invalid interval type: {}".format(interval_type) self.assertItemsEqual(results, [ (end_date_expected, self.COURSE_ID, 'audit', 1, 0, 0, 3, 1), (end_date_expected, self.COURSE_ID, 'honor', 1, 1, 0, 0, 2), (end_date_expected, self.COURSE_ID, 'staff', 2, 0, 0, 1, 2), (end_date_expected, self.COURSE_ID, 'verified', 0, 0, 1, 1, 0), ])
def test_location_by_course(self): self.upload_tracking_log(self.INPUT_FILE, self.START_DATE) for fixture_file_name in self.SQL_FIXTURES: self.execute_sql_fixture_file(fixture_file_name) self.task.launch([ 'InsertToMysqlLastCountryPerCourseTask', '--source', as_list_param(self.test_src), '--interval', self.DATE_INTERVAL.to_string(), '--n-reduce-tasks', str(self.NUM_REDUCERS), ]) self.maxDiff = None with self.export_db.cursor() as cursor: cursor.execute('SELECT * FROM course_enrollment_location_current ORDER BY country_code, course_id') results = cursor.fetchall() # TODO: what happens if the test starts near the UTC day boundary. The task sees that today is day "X", yet this # code sees the following day since the day boundary was crossed between then and now. today = datetime.utcnow().date() self.assertItemsEqual([ row[1:6] for row in results ], [ (today, self.COURSE_ID, None, 1, 1), (today, self.COURSE_ID, 'UNKNOWN', 0, 1), (today, self.COURSE_ID, 'IE', 1, 1), (today, self.COURSE_ID2, 'TH', 1, 1), (today, self.COURSE_ID, 'TH', 1, 1), ])
def test_event_log_exports_using_manifest(self): config_override = {'manifest': {'threshold': 1}} folders = {'prod': self.PROD_FOLDER, 'edge': self.EDGE_FOLDER} for environment in ['prod', 'edge']: self.task.launch([ 'EventExportTask', '--source', as_list_param(url_path_join(self.test_src, environment)), '--output-root', self.test_out, '--config', self.test_config, '--environment', environment, '--interval', '2014-05', '--gpg-key-dir', self.test_gpg_key_dir, '--gpg-master-key', '*****@*****.**', '--required-path-text', folders[environment], '--n-reduce-tasks', str(self.NUM_REDUCERS), ], config_override) self.validate_output()
def launch_task(self, output_root, extra_source=None, run_with_validation_events=True): """Run the enrollment validation workflow.""" # Widen the interval to include the latest validation events. interval = self.WIDER_DATE_INTERVAL if run_with_validation_events else self.DATE_INTERVAL source_pattern = '[\\".*?.log-.*.gz\\"]' validation_pattern = '".*?enroll_validated_\d{8}\.log\.gz"' launch_args = [ 'EnrollmentValidationWorkflow', '--interval', interval, '--validation-root', self.test_validate, '--validation-pattern', validation_pattern, '--credentials', self.import_db.credentials_file_url, '--n-reduce-tasks', str(self.NUM_REDUCERS), '--pattern', source_pattern, '--output-root', output_root, ] # An extra source means we're using synthetic events, so we # don't want to generate outside the interval in that case. if extra_source: launch_args.extend(['--source', '[\\"{}\\",\\"{}\\"]'.format(self.test_src, extra_source)]) else: launch_args.extend(['--source', as_list_param(self.test_src)]) launch_args.extend(['--generate-before']) if run_with_validation_events: launch_args.extend(['--expected-validation', "{}T00".format(self.END_DATE)]) self.task.launch(launch_args)
def test_user_activity(self): self.maxDiff = None self.upload_tracking_log(self.INPUT_FILE, self.END_DATE) self.task.launch([ 'InsertToMysqlCourseActivityTask', '--source', as_list_param(self.test_src), '--end-date', self.END_DATE.isoformat(), '--weeks', str(self.NUM_WEEKS), '--credentials', self.export_db.credentials_file_url, '--overwrite-n-days', '43', '--n-reduce-tasks', str(self.NUM_REDUCERS), '--overwrite-mysql' ]) with self.export_db.cursor() as cursor: cursor.execute('SELECT course_id, interval_start, interval_end, label, count FROM course_activity ORDER BY course_id, interval_end, label') results = cursor.fetchall() # pylint: disable=line-too-long self.assertItemsEqual([ row for row in results ], [ (self.COURSE_ID2, datetime.datetime(2014, 5, 19, 0, 0), datetime.datetime(2014, 5, 26, 0, 0), 'ACTIVE', 1), (self.COURSE_ID2, datetime.datetime(2014, 5, 19, 0, 0), datetime.datetime(2014, 5, 26, 0, 0), 'PLAYED_VIDEO', 1), (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ACTIVE', 4), (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ATTEMPTED_PROBLEM', 1), (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'PLAYED_VIDEO', 3), (self.COURSE_ID, datetime.datetime(2014, 6, 9, 0, 0), datetime.datetime(2014, 6, 16, 0, 0), 'ACTIVE', 1), (self.COURSE_ID, datetime.datetime(2014, 6, 9, 0, 0), datetime.datetime(2014, 6, 16, 0, 0), 'PLAYED_VIDEO', 1), (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ACTIVE', 4), (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ATTEMPTED_PROBLEM', 2), (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'PLAYED_VIDEO', 3), ])
def run_task(self, interval_type): """Run the CSV-generating task.""" self.task.launch([ 'StudentEngagementCsvFileTask', '--source', as_list_param(self.test_src), '--output-root', url_path_join(self.test_out, interval_type), '--n-reduce-tasks', str(self.NUM_REDUCERS), '--interval', self.interval, '--interval-type', interval_type, ])
def test_database_import(self): self.task.launch([ 'ImportMysqlToVerticaTask', '--date', self.DATE, '--marker-schema', 'acceptance_marker', '--exclude-field', as_list_param('.*\\.field_to_exclude$'), '--overwrite', ]) self.validate_output()
def run_obfuscated_package_task(self): """Run ObfuscatedPackageTask.""" self.task.launch([ 'ObfuscatedPackageTask', '--course', self.filename_safe_course_id, '--obfuscated-output-root', url_path_join(self.test_root, 'obfuscated-output'), '--gpg-key-dir', self.test_gpg_key_dir, '--gpg-master-key', '*****@*****.**', '--output-root', self.test_out, '--recipient', as_list_param('*****@*****.**'), '--format-version', self.FORMAT_VERSION ])
def test_base(self): self.upload_tracking_log(self.INPUT_FILE, datetime.date(2015, 8, 1)) self.execute_sql_fixture_file('load_auth_userprofile.sql') self.task.launch([ 'TagsDistributionWorkflow', '--source', as_list_param(self.test_src), '--interval', '2010-01-01-2020-01-01', '--n-reduce-tasks', str(self.NUM_REDUCERS), '--output-root', url_path_join(self.test_out, 'tags_dist_acceptance', ''), '--database', self.export_db.database_name ]) self.validate_base()
def test_user_activity(self): self.maxDiff = None self.upload_tracking_log(self.INPUT_FILE, self.END_DATE) self.task.launch([ 'InsertToMysqlCourseActivityTask', '--source', as_list_param(self.test_src), '--end-date', self.END_DATE.isoformat(), '--weeks', str(self.NUM_WEEKS), '--credentials', self.export_db.credentials_file_url, '--overwrite-n-days', '43', '--n-reduce-tasks', str(self.NUM_REDUCERS), '--overwrite-mysql' ]) with self.export_db.cursor() as cursor: cursor.execute( 'SELECT course_id, interval_start, interval_end, label, count FROM course_activity ORDER BY course_id, interval_end, label' ) results = cursor.fetchall() # pylint: disable=line-too-long self.assertItemsEqual([row for row in results], [ (self.COURSE_ID2, datetime.datetime(2014, 5, 19, 0, 0), datetime.datetime(2014, 5, 26, 0, 0), 'ACTIVE', 1), (self.COURSE_ID2, datetime.datetime(2014, 5, 19, 0, 0), datetime.datetime(2014, 5, 26, 0, 0), 'PLAYED_VIDEO', 1), (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ACTIVE', 4), (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ATTEMPTED_PROBLEM', 1), (self.COURSE_ID2, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'PLAYED_VIDEO', 3), (self.COURSE_ID, datetime.datetime(2014, 6, 9, 0, 0), datetime.datetime(2014, 6, 16, 0, 0), 'ACTIVE', 1), (self.COURSE_ID, datetime.datetime(2014, 6, 9, 0, 0), datetime.datetime(2014, 6, 16, 0, 0), 'PLAYED_VIDEO', 1), (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ACTIVE', 4), (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'ATTEMPTED_PROBLEM', 2), (self.COURSE_ID, datetime.datetime(2014, 6, 16, 0, 0), datetime.datetime(2014, 6, 23, 0, 0), 'PLAYED_VIDEO', 3), ])
def launch_task(self, output_root, extra_source=None, run_with_validation_events=True): """Run the enrollment validation workflow.""" # Widen the interval to include the latest validation events. interval = self.WIDER_DATE_INTERVAL if run_with_validation_events else self.DATE_INTERVAL source_pattern = '[\\".*?.log-.*.gz\\"]' validation_pattern = '".*?enroll_validated_\d{8}\.log\.gz"' launch_args = [ 'EnrollmentValidationWorkflow', '--interval', interval, '--validation-root', self.test_validate, '--validation-pattern', validation_pattern, '--credentials', self.import_db.credentials_file_url, '--n-reduce-tasks', str(self.NUM_REDUCERS), '--pattern', source_pattern, '--output-root', output_root, ] # An extra source means we're using synthetic events, so we # don't want to generate outside the interval in that case. if extra_source: launch_args.extend([ '--source', '[\\"{}\\",\\"{}\\"]'.format(self.test_src, extra_source) ]) else: launch_args.extend(['--source', as_list_param(self.test_src)]) launch_args.extend(['--generate-before']) if run_with_validation_events: launch_args.extend( ['--expected-validation', "{}T00".format(self.END_DATE)]) self.task.launch(launch_args)
def run_and_check(self, interval_type): self.task.launch([ 'StudentEngagementToMysqlTask', '--source', as_list_param(self.test_src), '--credentials', self.export_db.credentials_file_url, '--n-reduce-tasks', str(self.NUM_REDUCERS), '--interval', '2015-09-01-2015-09-16', '--interval-type', interval_type, ]) with self.export_db.cursor() as cursor: cursor.execute( 'SELECT end_date, course_id, username, ' 'forum_posts, forum_responses, forum_comments, ' 'forum_upvotes_given, forum_upvotes_received ' 'FROM student_engagement_{interval_type} WHERE course_id="{course_id}" ' 'ORDER BY end_date, username;'.format( course_id=self.COURSE_ID, interval_type=interval_type)) results = cursor.fetchall() if interval_type == 'weekly': end_date_expected = datetime.date(2015, 9, 15) elif interval_type == 'daily': end_date_expected = datetime.date(2015, 9, 14) else: assert False, "Invalid interval type: {}".format(interval_type) self.assertItemsEqual(results, [ (end_date_expected, self.COURSE_ID, 'audit', 1, 0, 0, 3, 1), (end_date_expected, self.COURSE_ID, 'honor', 1, 1, 0, 0, 2), (end_date_expected, self.COURSE_ID, 'staff', 2, 0, 0, 1, 2), (end_date_expected, self.COURSE_ID, 'verified', 0, 0, 1, 1, 0), ])