class ModuleEngagementUserSegmentDataTaskReducerTest(ReducerTestMixin, TestCase): """Base class for test analysis of student engagement summaries""" task_class = ModuleEngagementUserSegmentDataTask output_record_type = ModuleEngagementUserSegmentRecord def setUp(self): self.course_id = 'foo/bar/baz' self.username = '******' self.prev_week_start_date = datetime.date(2014, 3, 18) self.start_date = datetime.date(2014, 3, 25) self.date = datetime.date(2014, 4, 1) self.reduce_key = (self.course_id, self.username) self.input_record = ModuleEngagementSummaryRecord( course_id=self.course_id, username=self.username, start_date=self.start_date, end_date=self.date, problem_attempts=0, problems_attempted=0, problems_completed=0, problem_attempts_per_completed=0.0, videos_viewed=0, discussion_contributions=0, days_active=0, ) self.range_record = ModuleEngagementSummaryMetricRangeRecord( course_id=self.course_id, start_date=self.start_date, end_date=self.date, metric='problems_attempted', range_type='high', low_value=5.0, high_value=10.0 ) self.task = self.task_class( # pylint: disable=not-callable date=self.date, output_root=self.DEFAULT_ARGS['output_root'], overwrite_from_date=datetime.date(2014, 4, 1), ) def initialize_task(self, metric_ranges): """Given a list of metric ranges, setup the task by calling init_local""" metric_ranges_text = '\n'.join([ r.to_separated_values() for r in metric_ranges ]) self.task.input_local = MagicMock(return_value={ 'range_data': FakeTarget(value=metric_ranges_text) }) self.task.init_local() def test_init_local(self): other_course_record = self.range_record.replace( course_id='another/course/id', metric='problems_completed' ) self.initialize_task([ self.range_record, self.range_record.replace( range_type='low', low_value=0.0, high_value=3.0 ), other_course_record ]) self.assertEqual(dict(self.task.high_metric_ranges), { self.course_id: { 'problems_attempted': self.range_record }, 'another/course/id': { 'problems_completed': other_course_record } }) def test_init_local_empty_input(self): self.initialize_task([]) self.assertEqual(dict(self.task.high_metric_ranges), {}) def test_output_format(self): self.initialize_task([ self.range_record, self.range_record.replace( metric='problem_attempts_per_completed', low_value=8.0, high_value=10.1 ) ]) self._check_output_complete_tuple( [ self.input_record.replace( problems_attempted=6, problem_attempts_per_completed=9 ).to_separated_values() ], ( ( 'foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01', 'highly_engaged', 'problems_attempted' ), ( 'foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01', 'struggling', 'problem_attempts_per_completed' ), ) ) @data( 'problems_attempted', 'problems_completed', 'videos_viewed', 'discussion_contributions' ) def test_highly_engaged(self, metric): self.initialize_task([ self.range_record.replace( metric=metric ) ]) self._check_output_by_record_field( [ self.input_record.replace( **{metric: 8} ).to_separated_values() ], { 'segment': 'highly_engaged' } ) @data( 'problem_attempts', 'problem_attempts_per_completed', ) def test_not_highly_engaged(self, metric): self.initialize_task([ self.range_record.replace( metric=metric ) ]) output = self._get_reducer_output( [ self.input_record.replace( **{metric: 8} ).to_separated_values() ] ) self.assert_not_in_segment(output, 'highly_engaged') def assert_not_in_segment(self, output, segment): """Assert that the user was not put into the provided segment.""" for record_tuple in output: record = self.output_record_type.from_string_tuple(record_tuple) self.assertNotEqual(record.segment, segment) def test_highly_engaged_too_low(self): self.initialize_task([ self.range_record.replace( metric='problems_completed' ) ]) output = self._get_reducer_output( [ self.input_record.replace( problems_completed=0 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'highly_engaged') def test_highly_engaged_left_closed_interval_bottom(self): self.initialize_task([ self.range_record.replace( metric='problems_completed', low_value=6.0 ) ]) output = self._get_reducer_output( [ self.input_record.replace( problems_completed=6 ).to_separated_values() ] ) self.assert_in_segment(output, 'highly_engaged') def assert_in_segment(self, output, segment): """Assert that the user was put into the provided segment.""" for record_tuple in output: record = self.output_record_type.from_string_tuple(record_tuple) if record.segment == segment: return True return False def test_highly_engaged_left_closed_interval_top(self): self.initialize_task([ self.range_record.replace( metric='problems_completed', high_value=9.0 ) ]) output = self._get_reducer_output( [ self.input_record.replace( problems_completed=9 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'highly_engaged') def test_disengaging(self): self.initialize_task([]) output = self._get_reducer_output( [ self.input_record.replace( start_date=self.prev_week_start_date, end_date=self.start_date, days_active=1, ).to_separated_values() ] ) self.assert_in_segment(output, 'disengaging') def test_not_disengaging_only_recent(self): self.initialize_task([]) output = self._get_reducer_output( [ self.input_record.replace( days_active=1 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'disengaging') def test_struggling(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=8.0 ).to_separated_values() ] ) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_low_high_value(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', low_value=float('inf'), high_value=float('inf'), ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=float('inf') ).to_separated_values() ] ) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_high(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', high_value=float('inf'), ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=10.0 ).to_separated_values() ] ) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_high_value(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', high_value=float('inf'), ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=float('inf') ).to_separated_values() ] ) self.assert_in_segment(output, 'struggling') def test_not_struggling(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=3.0 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'struggling') def test_not_struggling_infinite_low(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', low_value=float('inf') ) ]) output = self._get_reducer_output( [ self.input_record.replace( problem_attempts_per_completed=100000.0 ).to_separated_values() ] ) self.assert_not_in_segment(output, 'struggling')
class ModuleEngagementUserSegmentDataTaskReducerTest(ReducerTestMixin, TestCase): """Base class for test analysis of student engagement summaries""" task_class = ModuleEngagementUserSegmentDataTask output_record_type = ModuleEngagementUserSegmentRecord def setUp(self): self.course_id = 'foo/bar/baz' self.username = '******' self.prev_week_start_date = datetime.date(2014, 3, 18) self.start_date = datetime.date(2014, 3, 25) self.date = datetime.date(2014, 4, 1) self.reduce_key = (self.course_id, self.username) self.input_record = ModuleEngagementSummaryRecord( course_id=self.course_id, username=self.username, start_date=self.start_date, end_date=self.date, problem_attempts=0, problems_attempted=0, problems_completed=0, problem_attempts_per_completed=0.0, videos_viewed=0, discussion_contributions=0, days_active=0, ) self.range_record = ModuleEngagementSummaryMetricRangeRecord( course_id=self.course_id, start_date=self.start_date, end_date=self.date, metric='problems_attempted', range_type='high', low_value=5.0, high_value=10.0) self.task = self.task_class( # pylint: disable=not-callable date=self.date, output_root=self.DEFAULT_ARGS['output_root'], overwrite_from_date=datetime.date(2014, 4, 1), ) def initialize_task(self, metric_ranges): """Given a list of metric ranges, setup the task by calling init_local""" metric_ranges_text = '\n'.join( [r.to_separated_values() for r in metric_ranges]) self.task.input_local = MagicMock( return_value={'range_data': FakeTarget(value=metric_ranges_text)}) self.task.init_local() def test_init_local(self): other_course_record = self.range_record.replace( course_id='another/course/id', metric='problems_completed') self.initialize_task([ self.range_record, self.range_record.replace(range_type='low', low_value=0.0, high_value=3.0), other_course_record ]) self.assertEqual( dict(self.task.high_metric_ranges), { self.course_id: { 'problems_attempted': self.range_record }, 'another/course/id': { 'problems_completed': other_course_record } }) def test_init_local_empty_input(self): self.initialize_task([]) self.assertEqual(dict(self.task.high_metric_ranges), {}) def test_output_format(self): self.initialize_task([ self.range_record, self.range_record.replace(metric='problem_attempts_per_completed', low_value=8.0, high_value=10.1) ]) self._check_output_complete_tuple([ self.input_record.replace( problems_attempted=6, problem_attempts_per_completed=9).to_separated_values() ], ( ('foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01', 'highly_engaged', 'problems_attempted'), ('foo/bar/baz', 'test_user', '2014-03-25', '2014-04-01', 'struggling', 'problem_attempts_per_completed'), )) @data('problems_attempted', 'problems_completed', 'videos_viewed', 'discussion_contributions') def test_highly_engaged(self, metric): self.initialize_task([self.range_record.replace(metric=metric)]) self._check_output_by_record_field( [self.input_record.replace(**{ metric: 8 }).to_separated_values()], {'segment': 'highly_engaged'}) @data( 'problem_attempts', 'problem_attempts_per_completed', ) def test_not_highly_engaged(self, metric): self.initialize_task([self.range_record.replace(metric=metric)]) output = self._get_reducer_output( [self.input_record.replace(**{ metric: 8 }).to_separated_values()]) self.assert_not_in_segment(output, 'highly_engaged') def assert_not_in_segment(self, output, segment): """Assert that the user was not put into the provided segment.""" for record_tuple in output: record = self.output_record_type.from_string_tuple(record_tuple) self.assertNotEqual(record.segment, segment) def test_highly_engaged_too_low(self): self.initialize_task( [self.range_record.replace(metric='problems_completed')]) output = self._get_reducer_output([ self.input_record.replace( problems_completed=0).to_separated_values() ]) self.assert_not_in_segment(output, 'highly_engaged') def test_highly_engaged_left_closed_interval_bottom(self): self.initialize_task([ self.range_record.replace(metric='problems_completed', low_value=6.0) ]) output = self._get_reducer_output([ self.input_record.replace( problems_completed=6).to_separated_values() ]) self.assert_in_segment(output, 'highly_engaged') def assert_in_segment(self, output, segment): """Assert that the user was put into the provided segment.""" for record_tuple in output: record = self.output_record_type.from_string_tuple(record_tuple) if record.segment == segment: return True return False def test_highly_engaged_left_closed_interval_top(self): self.initialize_task([ self.range_record.replace(metric='problems_completed', high_value=9.0) ]) output = self._get_reducer_output([ self.input_record.replace( problems_completed=9).to_separated_values() ]) self.assert_not_in_segment(output, 'highly_engaged') def test_disengaging(self): self.initialize_task([]) output = self._get_reducer_output([ self.input_record.replace( start_date=self.prev_week_start_date, end_date=self.start_date, days_active=1, ).to_separated_values() ]) self.assert_in_segment(output, 'disengaging') def test_not_disengaging_only_recent(self): self.initialize_task([]) output = self._get_reducer_output( [self.input_record.replace(days_active=1).to_separated_values()]) self.assert_not_in_segment(output, 'disengaging') def test_struggling(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', ) ]) output = self._get_reducer_output([ self.input_record.replace( problem_attempts_per_completed=8.0).to_separated_values() ]) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_low_high_value(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', low_value=float('inf'), high_value=float('inf'), ) ]) output = self._get_reducer_output([ self.input_record.replace(problem_attempts_per_completed=float( 'inf')).to_separated_values() ]) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_high(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', high_value=float('inf'), ) ]) output = self._get_reducer_output([ self.input_record.replace( problem_attempts_per_completed=10.0).to_separated_values() ]) self.assert_in_segment(output, 'struggling') def test_struggling_infinite_high_value(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', high_value=float('inf'), ) ]) output = self._get_reducer_output([ self.input_record.replace(problem_attempts_per_completed=float( 'inf')).to_separated_values() ]) self.assert_in_segment(output, 'struggling') def test_not_struggling(self): self.initialize_task([ self.range_record.replace( metric='problem_attempts_per_completed', ) ]) output = self._get_reducer_output([ self.input_record.replace( problem_attempts_per_completed=3.0).to_separated_values() ]) self.assert_not_in_segment(output, 'struggling') def test_not_struggling_infinite_low(self): self.initialize_task([ self.range_record.replace(metric='problem_attempts_per_completed', low_value=float('inf')) ]) output = self._get_reducer_output([ self.input_record.replace( problem_attempts_per_completed=100000.0).to_separated_values() ]) self.assert_not_in_segment(output, 'struggling')
class ModuleEngagementSummaryMetricRangesDataTaskReducerTest(ReducerTestMixin, TestCase): """Base class for test analysis of student engagement summaries""" task_class = ModuleEngagementSummaryMetricRangesDataTask output_record_type = ModuleEngagementSummaryMetricRangeRecord def setUp(self): super(ModuleEngagementSummaryMetricRangesDataTaskReducerTest, self).setUp() self.reduce_key = 'foo/bar/baz' self.input_record = ModuleEngagementSummaryRecord( course_id='foo/bar/baz', username='******', start_date=datetime.date(2014, 3, 25), end_date=datetime.date(2014, 4, 1), problem_attempts=1, problems_attempted=1, problems_completed=0, problem_attempts_per_completed=0.0, videos_viewed=0, discussion_contributions=0, days_active=1, ) def test_simple_distribution(self): # [4, 13, 13, 13] (3 records are <= 13, this accounts for 15% of the total 20 non-zero values) # [15] * 4 (throw in a bunch of data in the "normal" range) # [50] * 11 (round out the 20 records with some other arbitrary value, note that this will also contain two # of the three highest values) # [154] (throw in an outlier - a very high maximum value, this will show the high end of the range, but the # 85th percentile should be at the 50 value) # values = [4] + ([13] * 3) + ([0] * 4) + ([15] * 4) + ([50] * 11) + [154] values = [4] + ([13] * 3) + ([15] * 4) + ([50] * 11) + [154] self.assert_ranges( values, [ ('low', 0, 13.0), ('normal', 13.0, 50.0), ('high', 50.0, 'inf'), ] ) def assert_ranges(self, values, range_values): """Given a list of values, assert that the ranges generated have the min, low, high, and max bounds.""" # Manufacture some records with these values records = [self.input_record.replace(problem_attempts_per_completed=v).to_separated_values() for v in values] output = self._get_reducer_output(records) range_value_map = {rv[0]: rv for rv in range_values} tested = False for record in output: if record[3] == 'problem_attempts_per_completed': range_type, low, high = range_value_map[record[4]] self.assertEqual( record, ( 'foo/bar/baz', '2014-03-25', '2014-04-01', 'problem_attempts_per_completed', range_type, str(low), str(high), ) ) tested = True if not tested and len(values) > 0: self.fail("No records for 'problem_attempts_per_completed' found! Output = {}, Records = {}".format(output, records)) def test_identical_values(self): values = [5] * 6 self.assert_ranges(values, [('low', 0, 5.0), ('normal', 5.0, 'inf')]) def test_single_value(self): self.assert_ranges([1], [('low', 0, 1.0), ('normal', 1.0, 'inf')]) def test_single_infinite_value(self): # If num_problems_completed is zero, then problem_attempts_per_completed will be set to 'inf'. values = [float('inf')] self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')]) def test_multiple_infinite_values(self): values = [float('inf')] * 3 self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')]) def test_infinite_threshold_low_normal(self): values = [1, float('inf'), float('inf'), float('inf')] self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')]) def test_infinite_threshold_normal_high(self): values = [1, 2, 3, float('inf'), float('inf')] self.assert_ranges(values, [('low', 0, 1.6), ('normal', 1.6, 'inf'), ('high', 'inf', 'inf')]) def test_infinite_value_in_high(self): values = [1, 2, 3, 4, 5, 6, 7, float('inf')] self.assert_ranges(values, [('low', 0, 2.05), ('normal', 2.05, 6.95), ('high', 6.95, 'inf')]) def test_no_values(self): self.assert_ranges([], [('normal', 0, 'inf')]) def test_single_zero_value(self): values = [0] self.assert_ranges(values, [('normal', 0.0, 'inf')]) def test_multiple_zero_values(self): values = [0] * 3 self.assert_ranges(values, [('normal', 0.0, 'inf')]) def test_zeroes_are_normal(self): values = [1, 0, 0, 0] self.assert_ranges(values, [('normal', 0.0, 0.55), ('high', 0.55, 'inf')]) def test_zeroes_are_low(self): values = [0, 0, 0] + ([1] * 10) + ([2] * 4) self.assert_ranges(values, [('low', 0, 0.4), ('normal', 0.4, 2.0), ('high', 2.0, 'inf')])
class ModuleEngagementSummaryMetricRangesDataTaskReducerTest( ReducerTestMixin, TestCase): """Base class for test analysis of student engagement summaries""" task_class = ModuleEngagementSummaryMetricRangesDataTask output_record_type = ModuleEngagementSummaryMetricRangeRecord def setUp(self): super(ModuleEngagementSummaryMetricRangesDataTaskReducerTest, self).setUp() self.reduce_key = 'foo/bar/baz' self.input_record = ModuleEngagementSummaryRecord( course_id='foo/bar/baz', username='******', start_date=datetime.date(2014, 3, 25), end_date=datetime.date(2014, 4, 1), problem_attempts=1, problems_attempted=1, problems_completed=0, problem_attempts_per_completed=0.0, videos_viewed=0, discussion_contributions=0, days_active=1, ) def test_simple_distribution(self): # [4, 13, 13, 13] (3 records are <= 13, this accounts for 15% of the total 20 non-zero values) # [15] * 4 (throw in a bunch of data in the "normal" range) # [50] * 11 (round out the 20 records with some other arbitrary value, note that this will also contain two # of the three highest values) # [154] (throw in an outlier - a very high maximum value, this will show the high end of the range, but the # 85th percentile should be at the 50 value) # values = [4] + ([13] * 3) + ([0] * 4) + ([15] * 4) + ([50] * 11) + [154] values = [4] + ([13] * 3) + ([15] * 4) + ([50] * 11) + [154] self.assert_ranges(values, [ ('low', 0, 13.0), ('normal', 13.0, 50.0), ('high', 50.0, 'inf'), ]) def assert_ranges(self, values, range_values): """Given a list of values, assert that the ranges generated have the min, low, high, and max bounds.""" # Manufacture some records with these values records = [ self.input_record.replace( problem_attempts_per_completed=v).to_separated_values() for v in values ] output = self._get_reducer_output(records) range_value_map = {rv[0]: rv for rv in range_values} tested = False for record in output: if record[3] == 'problem_attempts_per_completed': range_type, low, high = range_value_map[record[4]] self.assertEqual(record, ( 'foo/bar/baz', '2014-03-25', '2014-04-01', 'problem_attempts_per_completed', range_type, str(low), str(high), )) tested = True if not tested and len(values) > 0: self.fail( "No records for 'problem_attempts_per_completed' found! Output = {}, Records = {}" .format(output, records)) def test_identical_values(self): values = [5] * 6 self.assert_ranges(values, [('low', 0, 5.0), ('normal', 5.0, 'inf')]) def test_single_value(self): self.assert_ranges([1], [('low', 0, 1.0), ('normal', 1.0, 'inf')]) def test_single_infinite_value(self): # If num_problems_completed is zero, then problem_attempts_per_completed will be set to 'inf'. values = [float('inf')] self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')]) def test_multiple_infinite_values(self): values = [float('inf')] * 3 self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')]) def test_infinite_threshold_low_normal(self): values = [1, float('inf'), float('inf'), float('inf')] self.assert_ranges(values, [('low', 0, 'inf'), ('normal', 'inf', 'inf')]) def test_infinite_threshold_normal_high(self): values = [1, 2, 3, float('inf'), float('inf')] self.assert_ranges(values, [('low', 0, 1.6), ('normal', 1.6, 'inf'), ('high', 'inf', 'inf')]) def test_infinite_value_in_high(self): values = [1, 2, 3, 4, 5, 6, 7, float('inf')] self.assert_ranges(values, [('low', 0, 2.05), ('normal', 2.05, 6.95), ('high', 6.95, 'inf')]) def test_no_values(self): self.assert_ranges([], [('normal', 0, 'inf')]) def test_single_zero_value(self): values = [0] self.assert_ranges(values, [('normal', 0.0, 'inf')]) def test_multiple_zero_values(self): values = [0] * 3 self.assert_ranges(values, [('normal', 0.0, 'inf')]) def test_zeroes_are_normal(self): values = [1, 0, 0, 0] self.assert_ranges(values, [('normal', 0.0, 0.55), ('high', 0.55, 'inf')]) def test_zeroes_are_low(self): values = [0, 0, 0] + ([1] * 10) + ([2] * 4) self.assert_ranges(values, [('low', 0, 0.4), ('normal', 0.4, 2.0), ('high', 2.0, 'inf')])