def setUp(self): self.task_class = AnswerDistributionOneFilePerCourseTask super(AnswerDistributionOneFilePerCourseTaskTest, self).setUp() self.task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=None, dest=None, name=None, include=None, output_root=None, )
def test_output_path_for_opaque_key(self): course_id = str(CourseLocator(org='foo', course='bar', run='baz')) hashed_course_id = hashlib.sha1(course_id).hexdigest() task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=[], dest=None, name='name', include=[], output_root='/tmp', ) output_path = task.output_path_for_key(course_id) expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(hashed_course_id) self.assertEquals(output_path, expected_output_path)
def test_output_path_for_legacy_key(self): course_id = 'foo/bar/baz' hashed_course_id = hashlib.sha1(course_id).hexdigest() task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=None, dest=None, name='name', include=None, output_root='/tmp', ) output_path = task.output_path_for_key(course_id) expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(hashed_course_id) self.assertEquals(output_path, expected_output_path)
def test_delete_output_root(self): # It's still possible to use the delete option # to get rid of the output_root directory. task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=None, dest=None, name='name', include=None, output_root=self.output_root, delete_output_root="true", marker=self.output_root, ) self.assertFalse(task.complete()) self.assertFalse(os.path.exists(self.output_root))
def test_output_path_for_opaque_key(self): course_id = str(CourseLocator(org='foo', course='bar', run='baz')) hashed_course_id = hashlib.sha1(course_id).hexdigest() task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=None, dest=None, name='name', include=None, output_root='/tmp', ) output_path = task.output_path_for_key(course_id) expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format( hashed_course_id) self.assertEquals(output_path, expected_output_path)
def test_delete_output_root(self): # It's still possible to use the delete option # to get rid of the output_root directory. task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=[], dest=None, name='name', include=[], output_root=self.output_root, delete_output_root="true", marker=self.output_root, ) self.assertFalse(task.complete()) self.assertFalse(os.path.exists(self.output_root))
def test_output_path_for_legacy_key(self): course_id = 'foo/bar/baz' hashed_course_id = hashlib.sha1(course_id).hexdigest() task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=[], dest=None, name='name', include=[], output_root='/tmp', ) output_path = task.output_path_for_key(course_id) expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format( hashed_course_id) self.assertEquals(output_path, expected_output_path)
def setUp(self): self.task_class = AnswerDistributionOneFilePerCourseTask super(AnswerDistributionOneFilePerCourseTaskTest, self).setUp() self.task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=[], dest=None, name=None, include=[], output_root=None, )
def test_no_delete_output_root(self): # Not using the delete_output_root option will # not delete the output_root. self.assertTrue(os.path.exists(self.output_root)) AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=None, dest=None, name='name', include=None, output_root=self.output_root, ) self.assertTrue(os.path.exists(self.output_root))
class AnswerDistributionOneFilePerCourseTaskTest(MapperTestMixin, ReducerTestMixin, TestCase): """Tests for AnswerDistributionOneFilePerCourseTask class.""" def setUp(self): self.task_class = AnswerDistributionOneFilePerCourseTask super(AnswerDistributionOneFilePerCourseTaskTest, self).setUp() self.task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=None, dest=None, name=None, include=None, output_root=None, ) def test_map_single_value(self): self.assert_single_map_output('foo\tbar', 'foo', 'bar') def test_reduce_multiple_values(self): field_names = AnswerDistributionPerCourseMixin.get_column_order() # To test sorting, the first sample is made to sort after the # second sample. column_values_2 = [(k, unicode(k) + u'\u2603') for k in field_names] column_values_2[3] = (column_values_2[3][0], 10) column_values_1 = list(column_values_2) column_values_1[4] = (column_values_1[4][0], u'ZZZZZZZZZZZ') sample_input_1 = json.dumps(dict(column_values_1)) sample_input_2 = json.dumps(dict(column_values_2)) mock_output_file = Mock() self.task.multi_output_reducer('foo', iter([sample_input_1, sample_input_2]), mock_output_file) expected_header_string = ','.join(field_names) + '\r\n' self.assertEquals(mock_output_file.write.mock_calls[0], call(expected_header_string)) # Confirm that the second sample appears before the first. expected_row_1 = ','.join( unicode(v[1]).encode('utf8') for v in column_values_2) + '\r\n' self.assertEquals(mock_output_file.write.mock_calls[1], call(expected_row_1)) expected_row_2 = ','.join( unicode(v[1]).encode('utf8') for v in column_values_1) + '\r\n' self.assertEquals(mock_output_file.write.mock_calls[2], call(expected_row_2)) def test_output_path_for_legacy_key(self): course_id = 'foo/bar/baz' hashed_course_id = hashlib.sha1(course_id).hexdigest() task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=None, dest=None, name='name', include=None, output_root='/tmp', ) output_path = task.output_path_for_key(course_id) expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format( hashed_course_id) self.assertEquals(output_path, expected_output_path) def test_output_path_for_opaque_key(self): course_id = str(CourseLocator(org='foo', course='bar', run='baz')) hashed_course_id = hashlib.sha1(course_id).hexdigest() task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=None, dest=None, name='name', include=None, output_root='/tmp', ) output_path = task.output_path_for_key(course_id) expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format( hashed_course_id) self.assertEquals(output_path, expected_output_path)
class AnswerDistributionOneFilePerCourseTaskTest(MapperTestMixin, ReducerTestMixin, TestCase): """Tests for AnswerDistributionOneFilePerCourseTask class.""" def setUp(self): self.task_class = AnswerDistributionOneFilePerCourseTask super(AnswerDistributionOneFilePerCourseTaskTest, self).setUp() self.task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=[], dest=None, name=None, include=[], output_root=None, ) def test_map_single_value(self): self.assert_single_map_output('foo\tbar', 'foo', 'bar') def test_reduce_multiple_values(self): field_names = AnswerDistributionPerCourseMixin.get_column_order() # To test sorting, the first sample is made to sort after the # second sample. column_values_2 = [(k, unicode(k) + u'\u2603') for k in field_names] column_values_2[3] = (column_values_2[3][0], 10) column_values_1 = list(column_values_2) column_values_1[4] = (column_values_1[4][0], u'ZZZZZZZZZZZ') sample_input_1 = json.dumps(dict(column_values_1)) sample_input_2 = json.dumps(dict(column_values_2)) mock_output_file = Mock() self.task.multi_output_reducer('foo', iter([sample_input_1, sample_input_2]), mock_output_file) expected_header_string = ','.join(field_names) + '\r\n' self.assertEquals(mock_output_file.write.mock_calls[0], call(expected_header_string)) # Confirm that the second sample appears before the first. expected_row_1 = ','.join(unicode(v[1]).encode('utf8') for v in column_values_2) + '\r\n' self.assertEquals(mock_output_file.write.mock_calls[1], call(expected_row_1)) expected_row_2 = ','.join(unicode(v[1]).encode('utf8') for v in column_values_1) + '\r\n' self.assertEquals(mock_output_file.write.mock_calls[2], call(expected_row_2)) def test_output_path_for_legacy_key(self): course_id = 'foo/bar/baz' hashed_course_id = hashlib.sha1(course_id).hexdigest() task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=[], dest=None, name='name', include=[], output_root='/tmp', ) output_path = task.output_path_for_key(course_id) expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(hashed_course_id) self.assertEquals(output_path, expected_output_path) def test_output_path_for_opaque_key(self): course_id = str(CourseLocator(org='foo', course='bar', run='baz')) hashed_course_id = hashlib.sha1(course_id).hexdigest() task = AnswerDistributionOneFilePerCourseTask( mapreduce_engine='local', src=[], dest=None, name='name', include=[], output_root='/tmp', ) output_path = task.output_path_for_key(course_id) expected_output_path = '/tmp/{0}/foo_bar_baz_answer_distribution.csv'.format(hashed_course_id) self.assertEquals(output_path, expected_output_path)