def setUp(self):
     self.initialize_ids()
     self.task = AnswerDistributionPerCourseMixin()
     self.timestamp = "2013-12-17T15:38:32.805444"
     self.earlier_timestamp = "2013-12-15T15:38:32.805444"
     self.key = (self.course_id, self.answer_id)
     self.problem_display_name = "This is the Problem for You!"
    def test_reduce_multiple_values(self):
        field_names = AnswerDistributionPerCourseMixin.get_column_order()

        # To test sorting, the first sample is made to sort after the
        # second sample.
        column_values_2 = [(k, unicode(k) + u'\u2603') for k in field_names]
        column_values_2[3] = (column_values_2[3][0], 10)
        column_values_1 = list(column_values_2)
        column_values_1[4] = (column_values_1[4][0], u'ZZZZZZZZZZZ')
        sample_input_1 = json.dumps(dict(column_values_1))
        sample_input_2 = json.dumps(dict(column_values_2))
        mock_output_file = Mock()

        self.task.multi_output_reducer('foo',
                                       iter([sample_input_1, sample_input_2]),
                                       mock_output_file)

        expected_header_string = ','.join(field_names) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[0],
                          call(expected_header_string))

        # Confirm that the second sample appears before the first.
        expected_row_1 = ','.join(
            unicode(v[1]).encode('utf8') for v in column_values_2) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[1],
                          call(expected_row_1))
        expected_row_2 = ','.join(
            unicode(v[1]).encode('utf8') for v in column_values_1) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[2],
                          call(expected_row_2))
 def setUp(self):
     self.initialize_ids()
     self.task = AnswerDistributionPerCourseMixin()
     self.timestamp = "2013-12-17T15:38:32.805444"
     self.earlier_timestamp = "2013-12-15T15:38:32.805444"
     self.key = (self.course_id, self.answer_id)
     self.problem_display_name = "This is the Problem for You!"
    def test_reduce_multiple_values(self):
        field_names = AnswerDistributionPerCourseMixin.get_column_order()

        # To test sorting, the first sample is made to sort after the
        # second sample.
        column_values_2 = [(k, unicode(k) + u'\u2603') for k in field_names]
        column_values_2[3] = (column_values_2[3][0], 10)
        column_values_1 = list(column_values_2)
        column_values_1[4] = (column_values_1[4][0], u'ZZZZZZZZZZZ')
        sample_input_1 = json.dumps(dict(column_values_1))
        sample_input_2 = json.dumps(dict(column_values_2))
        mock_output_file = Mock()

        self.task.multi_output_reducer('foo', iter([sample_input_1, sample_input_2]), mock_output_file)

        expected_header_string = ','.join(field_names) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[0], call(expected_header_string))

        # Confirm that the second sample appears before the first.
        expected_row_1 = ','.join(unicode(v[1]).encode('utf8') for v in column_values_2) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[1], call(expected_row_1))
        expected_row_2 = ','.join(unicode(v[1]).encode('utf8') for v in column_values_1) + '\r\n'
        self.assertEquals(mock_output_file.write.mock_calls[2], call(expected_row_2))
class AnswerDistributionPerCourseReduceTest(InitializeOpaqueKeysMixin,
                                            unittest.TestCase):
    """
    Verify that AnswerDistributionPerCourseMixin.reduce() works correctly.
    """
    def setUp(self):
        self.initialize_ids()
        self.task = AnswerDistributionPerCourseMixin()
        self.timestamp = "2013-12-17T15:38:32.805444"
        self.earlier_timestamp = "2013-12-15T15:38:32.805444"
        self.key = (self.course_id, self.answer_id)
        self.problem_display_name = "This is the Problem for You!"

    def _get_reducer_output(self, values):
        """Run reducer with provided values hardcoded key."""
        return tuple(self.task.reducer(self.key, values))

    def _check_output(self, inputs, expected):
        """Compare generated with expected output."""
        reducer_output = self._get_reducer_output(inputs)
        self.assertEquals(len(reducer_output), len(expected))
        for course_id, _output in reducer_output:
            self.assertEquals(course_id, self.course_id)
        # We don't know what order the outputs will be dumped for a given
        # set of input dicts, so we have to compare sets of items.
        reducer_outputs = set([
            frozenset(json.loads(output).items())
            for _, output in reducer_output
        ])
        expected_outputs = set(
            [frozenset(output.items()) for output in expected])
        self.assertEquals(reducer_outputs, expected_outputs)

    def _get_answer_data(self, **kwargs):
        """Returns answer data with submission information for input to reducer."""
        answer_data = {
            "answer": u"\u00b2",
            "problem_display_name": None,
            "variant": "",
            "correct": False,
            "problem_id": self.problem_id,
            "input_type": "formulaequationinput",
            "question": u"Enter the number(\u00ba) of fingers on a human hand",
            "response_type": "numericalresponse",
        }
        answer_data.update(**kwargs)
        return answer_data

    def _get_non_submission_answer_data(self, **kwargs):
        """Returns answer data without submission information for input to reducer ."""
        answer_data = {
            "answer_value_id": u'\u00b2',
            "problem_display_name": None,
            "variant": "1",
            "correct": False,
            "problem_id": self.problem_id,
        }
        answer_data.update(**kwargs)
        return answer_data

    def _get_expected_output(self, answer_data, **kwargs):
        """Get an expected reducer output based on the input."""
        expected_output = {
            "Problem Display Name":
            answer_data.get('problem_display_name') or "",
            "Count":
            1,
            "PartID":
            self.answer_id,
            "Question":
            answer_data.get('question') or "",
            "AnswerValue":
            answer_data.get('answer') or answer_data.get('answer_value_id')
            or "",
            "ValueID":
            "",
            "Variant":
            answer_data.get('variant') or "",
            "Correct Answer":
            "1" if answer_data['correct'] else '0',
            "ModuleID":
            self.problem_id,
        }
        expected_output.update(**kwargs)
        return expected_output

    def test_no_user_counts(self):
        self.assertEquals(self._get_reducer_output([]), tuple())

    def test_one_answer_event(self):
        answer_data = self._get_answer_data()
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output, ))

    def test_event_with_variant(self):
        answer_data = self._get_answer_data(variant=629)
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output, ))

    def test_event_with_problem_name(self):
        answer_data = self._get_answer_data(
            problem_display_name=self.problem_display_name)
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output, ))

    def check_choice_answer(self, answer, expected):
        """Run a choice answer with a provided value, and compare with expected."""
        answer_data = self._get_answer_data(
            answer_value_id='choice_1',
            answer=answer,
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data,
                                                    ValueID='choice_1',
                                                    AnswerValue=expected)
        self._check_output([input_data], (expected_output, ))

    def test_choice_answer(self):
        self.check_choice_answer('First Choice', 'First Choice')

    def test_choice_answer_with_whitespace(self):
        self.check_choice_answer('First Choice\t', 'First Choice')

    def test_choice_answer_with_empty_string(self):
        self.check_choice_answer('', '')

    def test_choice_answer_with_empty_markup(self):
        self.check_choice_answer('<text><span>First Choice</span></text>',
                                 'First Choice')

    def test_choice_answer_with_non_element_markup(self):
        # This tests a branch of the get_text_from_element logic,
        # where there is no tag on an element.
        self.check_choice_answer(
            '<text><span>First<!-- embedded comment --> Choice</span></text>',
            'First Choice')

    def test_choice_answer_with_html_markup(self):
        self.check_choice_answer('<p>First<br>Choice', 'First Choice')

    def test_choice_answer_with_embedded_whitespace(self):
        self.check_choice_answer('First  \t\n    Choice  ', 'First Choice')

    def test_choice_answer_with_bad_html_markup(self):
        self.check_choice_answer('<p First <br>Choice', 'Choice')

    def test_choice_answer_with_bad2_html_markup(self):
        self.check_choice_answer('First br>Choice', 'First br>Choice')

    def test_choice_answer_with_cdata_html_markup(self):
        self.check_choice_answer(
            'First <![CDATA[This is to be ignored.]]>  Choice', 'First Choice')

    def test_multiple_choice_answer(self):
        answer_data = self._get_answer_data(
            answer_value_id=['choice_1', 'choice_2', 'choice_4'],
            answer=[
                u'First Ch\u014dice', u'Second Ch\u014dice',
                u'Fourth Ch\u014dice'
            ],
            response_type="multiplechoiceresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='[choice_1|choice_2|choice_4]',
            AnswerValue=
            u'[First Ch\u014dice|Second Ch\u014dice|Fourth Ch\u014dice]')
        self._check_output([input_data], (expected_output, ))

    def test_multiple_choice_answer_with_markup(self):
        answer_data = self._get_answer_data(
            answer_value_id=['choice_1', 'choice_2', 'choice_4'],
            answer=[
                u'<text>First Ch\u014dice</text>',
                u'Second <sup>Ch\u014dice</sup>',
                u'Fourth <table><tbody><tr><td>Ch\u014dice</td></tr></tbody></table> goes here.'
            ],
            response_type="multiplechoiceresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='[choice_1|choice_2|choice_4]',
            AnswerValue=
            u'[First Ch\u014dice|Second Ch\u014dice|Fourth Ch\u014dice goes here.]'
        )
        self._check_output([input_data], (expected_output, ))

    def test_filtered_response_type(self):
        answer_data = self._get_answer_data(response_type="nonsenseresponse", )
        input_data = (self.timestamp, json.dumps(answer_data))
        self.assertEquals(self._get_reducer_output([input_data]), tuple())

    @with_luigi_config('answer-distribution', 'valid_response_types',
                       OPTION_REMOVED)
    def test_filtered_response_type_default(self):
        answer_data = self._get_answer_data(response_type="nonsenseresponse", )
        input_data = (self.timestamp, json.dumps(answer_data))
        self.assertEquals(self._get_reducer_output([input_data]), tuple())

    @with_luigi_config('answer-distribution', 'valid_response_types',
                       OPTION_REMOVED)
    def test_valid_response_type_default(self):
        answer_data = self._get_answer_data(
            response_type="multiplechoiceresponse", )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output, ))

    @with_luigi_config('answer-distribution', 'valid_response_types',
                       'multiplechoiceresponse,numericalresponse')
    def test_filtered_response_type_with_config(self):
        answer_data = self._get_answer_data(response_type="nonsenseresponse", )
        input_data = (self.timestamp, json.dumps(answer_data))
        self.assertEquals(self._get_reducer_output([input_data]), tuple())

    @with_luigi_config('answer-distribution', 'valid_response_types',
                       'multiplechoiceresponse,numericalresponse')
    def test_valid_response_type_with_config(self):
        answer_data = self._get_answer_data(
            response_type="multiplechoiceresponse", )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output, ))

    def test_filtered_non_submission_answer(self):
        answer_data = self._get_non_submission_answer_data()
        input_data = (self.timestamp, json.dumps(answer_data))
        self.assertEquals(self._get_reducer_output([input_data]), tuple())

    def test_two_answer_event_same(self):
        answer_data = self._get_answer_data()
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data))
        input_data_2 = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data, Count=2)
        self._check_output([input_data_1, input_data_2], (expected_output, ))

    def test_two_answer_event_same_reversed(self):
        answer_data = self._get_answer_data()
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data))
        input_data_2 = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data, Count=2)
        self._check_output([input_data_2, input_data_1], (expected_output, ))

    def test_two_answer_event_same_old_and_new(self):
        answer_data_1 = self._get_non_submission_answer_data()
        answer_data_2 = self._get_answer_data()
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output = self._get_expected_output(answer_data_2, Count=2)
        self._check_output([input_data_1, input_data_2], (expected_output, ))

    def test_same_old_and_new_with_variant(self):
        answer_data_1 = self._get_non_submission_answer_data(variant=123)
        answer_data_2 = self._get_answer_data(variant=123)
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output = self._get_expected_output(answer_data_2, Count=2)
        self._check_output([input_data_1, input_data_2], (expected_output, ))

    def test_two_answer_event_different_answer(self):
        answer_data_1 = self._get_answer_data(answer="first")
        answer_data_2 = self._get_answer_data(answer="second")
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_2 = self._get_expected_output(answer_data_2)
        self._check_output([input_data_1, input_data_2],
                           (expected_output_1, expected_output_2))

    def test_two_answer_event_different_answer_by_whitespace(self):
        answer_data_1 = self._get_answer_data(answer="\t\n\nfirst   ")
        answer_data_2 = self._get_answer_data(answer="first")
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output = self._get_expected_output(answer_data_2, Count=2)
        self._check_output([input_data_1, input_data_2], (expected_output, ))

    def test_two_answer_event_different_old_and_new(self):
        answer_data_1 = self._get_non_submission_answer_data(
            answer_value_id="first")
        answer_data_2 = self._get_answer_data(
            problem_display_name=self.problem_display_name)
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_2 = self._get_expected_output(answer_data_2)
        # An older non-submission-based event should not inherit
        # information from a newer submission-based event.
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_1['Problem Display Name'] = expected_output_2[
            'Problem Display Name']
        self._check_output([input_data_1, input_data_2],
                           (expected_output_1, expected_output_2))

    def test_two_answer_event_different_variant(self):
        answer_data_1 = self._get_answer_data(variant=123)
        answer_data_2 = self._get_answer_data(variant=456)
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_2 = self._get_expected_output(answer_data_2)
        self._check_output([input_data_1, input_data_2],
                           (expected_output_1, expected_output_2))

    def test_two_answer_event_different_variant_empty_new(self):
        answer_data_1 = self._get_answer_data(variant=123)
        answer_data_2 = self._get_answer_data(variant='')
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_2 = self._get_expected_output(answer_data_2)
        self._check_output([input_data_1, input_data_2],
                           (expected_output_1, expected_output_2))

    def test_problem_type_changed_to_multi_choice(self):
        answer_data_1 = self._get_answer_data(
            answer=u'First Ch\u014dice',
            response_type='optionresponse',
        )
        answer_data_2 = self._get_answer_data(
            answer_value_id=['choice_1', 'choice_2', 'choice_4'],
            answer=[
                u'First Ch\u014dice', u'Second Ch\u014dice',
                u'Fourth Ch\u014dice'
            ],
            response_type="multiplechoiceresponse",
        )
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_2 = self._get_expected_output(
            answer_data_2,
            ValueID='[choice_1|choice_2|choice_4]',
            AnswerValue=
            u'[First Ch\u014dice|Second Ch\u014dice|Fourth Ch\u014dice]')
        self._check_output([input_data_1, input_data_2],
                           (expected_output_1, expected_output_2))

    def _load_metadata(self, **kwargs):
        """Defines some metadata for test answer."""
        metadata_dict = {
            self.answer_id: {
                "question": u"Pick One or \u00b2",
                "response_type": "multiplechoiceresponse",
                "input_type": "my_input_type",
                "problem_display_name": self.problem_display_name,
            }
        }
        metadata_dict[self.answer_id].update(**kwargs)
        answer_metadata = StringIO.StringIO(json.dumps(metadata_dict))
        self.task.load_answer_metadata(answer_metadata)

    def test_non_submission_choice_with_metadata(self):
        self._load_metadata(answer_value_id_map={
            "choice_1": u"First Ch\u014dice",
            "choice_2": u"Second Ch\u014dice"
        })
        answer_data = self._get_non_submission_answer_data(
            answer_value_id='choice_1', )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='choice_1',
            AnswerValue=u'First Ch\u014dice',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name
        self._check_output([input_data], (expected_output, ))

    def test_non_submission_multichoice_with_metadata(self):
        self._load_metadata(answer_value_id_map={
            "choice_1": "First Choice",
            "choice_2": "Second Choice"
        })
        answer_data = self._get_non_submission_answer_data(
            answer_value_id=['choice_1', 'choice_2'])
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='[choice_1|choice_2]',
            AnswerValue='[First Choice|Second Choice]',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name

        self._check_output([input_data], (expected_output, ))

    def test_non_submission_nonmapped_multichoice_with_metadata(self):
        self._load_metadata()
        answer_data = self._get_non_submission_answer_data(
            answer_value_id=['choice_1', 'choice_2'])
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='[choice_1|choice_2]',
            AnswerValue='',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name
        self._check_output([input_data], (expected_output, ))

    def test_non_submission_nonmapped_choice_with_metadata(self):
        self._load_metadata()
        answer_data = self._get_non_submission_answer_data(
            answer_value_id='choice_1')
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='choice_1',
            AnswerValue='',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name
        self._check_output([input_data], (expected_output, ))

    def test_non_submission_nonmapped_nonchoice_with_metadata(self):
        self._load_metadata(response_type="optionresponse")
        answer_data = self._get_non_submission_answer_data()
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            AnswerValue=u'\u00b2',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name
        self._check_output([input_data], (expected_output, ))
class AnswerDistributionPerCourseReduceTest(InitializeOpaqueKeysMixin, unittest.TestCase):
    """
    Verify that AnswerDistributionPerCourseMixin.reduce() works correctly.
    """
    def setUp(self):
        self.initialize_ids()
        self.task = AnswerDistributionPerCourseMixin()
        self.timestamp = "2013-12-17T15:38:32.805444"
        self.earlier_timestamp = "2013-12-15T15:38:32.805444"
        self.key = (self.course_id, self.answer_id)
        self.problem_display_name = "This is the Problem for You!"

    def _get_reducer_output(self, values):
        """Run reducer with provided values hardcoded key."""
        return tuple(self.task.reducer(self.key, values))

    def _check_output(self, inputs, expected):
        """Compare generated with expected output."""
        reducer_output = self._get_reducer_output(inputs)
        self.assertEquals(len(reducer_output), len(expected))
        for course_id, _output in reducer_output:
            self.assertEquals(course_id, self.course_id)
        # We don't know what order the outputs will be dumped for a given
        # set of input dicts, so we have to compare sets of items.
        reducer_outputs = set([frozenset(json.loads(output).items()) for _, output in reducer_output])
        expected_outputs = set([frozenset(output.items()) for output in expected])
        self.assertEquals(reducer_outputs, expected_outputs)

    def _get_answer_data(self, **kwargs):
        """Returns answer data with submission information for input to reducer."""
        answer_data = {
            "answer": u"\u00b2",
            "problem_display_name": None,
            "variant": "",
            "correct": False,
            "problem_id": self.problem_id,
            "input_type": "formulaequationinput",
            "question": u"Enter the number(\u00ba) of fingers on a human hand",
            "response_type": "numericalresponse",
            "attempt_category": 'last',
        }
        answer_data.update(**kwargs)
        return answer_data

    def _get_non_submission_answer_data(self, **kwargs):
        """Returns answer data without submission information for input to reducer ."""
        answer_data = {
            "answer_value_id": u'\u00b2',
            "problem_display_name": None,
            "variant": "1",
            "correct": False,
            "problem_id": self.problem_id,
            "attempt_category": 'last',
        }
        answer_data.update(**kwargs)
        return answer_data

    def _get_expected_output(self, answer_data, **kwargs):
        """Get an expected reducer output based on the input."""
        expected_output = {
            "Problem Display Name": answer_data.get('problem_display_name') or "",
            "First Response Count": int(answer_data.get('attempt_category', 'first') == 'first'),
            "Last Response Count": int(answer_data.get('attempt_category', 'first') != 'first'),
            "PartID": self.answer_id,
            "Question": answer_data.get('question') or "",
            "AnswerValue": answer_data.get('answer') or answer_data.get('answer_value_id') or "",
            "ValueID": "",
            "Variant": answer_data.get('variant') or "",
            "Correct Answer": "1" if answer_data['correct'] else '0',
            "ModuleID": self.problem_id,
        }
        expected_output.update(**kwargs)
        return expected_output

    def test_no_user_counts(self):
        self.assertEquals(self._get_reducer_output([]), tuple())

    def test_one_answer_event(self):
        answer_data = self._get_answer_data()
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output,))

    def test_event_with_variant(self):
        answer_data = self._get_answer_data(variant=629)
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output,))

    def test_event_with_problem_name(self):
        answer_data = self._get_answer_data(problem_display_name=self.problem_display_name)
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output,))

    def check_choice_answer(self, answer, expected):
        """Run a choice answer with a provided value, and compare with expected."""
        answer_data = self._get_answer_data(
            answer_value_id='choice_1',
            answer=answer,
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data, ValueID='choice_1', AnswerValue=expected)
        self._check_output([input_data], (expected_output,))

    def test_choice_answer(self):
        self.check_choice_answer('First Choice', 'First Choice')

    def test_choice_answer_with_whitespace(self):
        self.check_choice_answer('First Choice\t', 'First Choice')

    def test_choice_answer_with_empty_string(self):
        self.check_choice_answer('', '')

    def test_choice_answer_with_empty_markup(self):
        self.check_choice_answer('<text><span>First Choice</span></text>', 'First Choice')

    def test_choice_answer_with_non_element_markup(self):
        # This tests a branch of the get_text_from_element logic,
        # where there is no tag on an element.
        self.check_choice_answer(
            '<text><span>First<!-- embedded comment --> Choice</span></text>',
            'First Choice'
        )

    def test_choice_answer_with_html_markup(self):
        self.check_choice_answer('<p>First<br>Choice', 'First Choice')

    def test_choice_answer_with_embedded_whitespace(self):
        self.check_choice_answer('First  \t\n    Choice  ', 'First Choice')

    def test_choice_answer_with_bad_html_markup(self):
        self.check_choice_answer('<p First <br>Choice', 'Choice')

    def test_choice_answer_with_bad2_html_markup(self):
        self.check_choice_answer('First br>Choice', 'First br>Choice')

    def test_choice_answer_with_cdata_html_markup(self):
        self.check_choice_answer('First <![CDATA[This is to be ignored.]]>  Choice', 'First Choice')

    def test_multiple_choice_answer(self):
        answer_data = self._get_answer_data(
            answer_value_id=['choice_1', 'choice_2', 'choice_4'],
            answer=[u'First Ch\u014dice', u'Second Ch\u014dice', u'Fourth Ch\u014dice'],
            response_type="multiplechoiceresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='[choice_1|choice_2|choice_4]',
            AnswerValue=u'[First Ch\u014dice|Second Ch\u014dice|Fourth Ch\u014dice]'
        )
        self._check_output([input_data], (expected_output,))

    def test_multiple_choice_answer_with_markup(self):
        answer_data = self._get_answer_data(
            answer_value_id=['choice_1', 'choice_2', 'choice_4'],
            answer=[
                u'<text>First Ch\u014dice</text>',
                u'Second <sup>Ch\u014dice</sup>',
                u'Fourth <table><tbody><tr><td>Ch\u014dice</td></tr></tbody></table> goes here.'
            ],
            response_type="multiplechoiceresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='[choice_1|choice_2|choice_4]',
            AnswerValue=u'[First Ch\u014dice|Second Ch\u014dice|Fourth Ch\u014dice goes here.]'
        )
        self._check_output([input_data], (expected_output,))

    def test_filtered_response_type(self):
        answer_data = self._get_answer_data(
            response_type="nonsenseresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        self.assertEquals(self._get_reducer_output([input_data]), tuple())

    @with_luigi_config('answer-distribution', 'valid_response_types', OPTION_REMOVED)
    def test_filtered_response_type_default(self):
        answer_data = self._get_answer_data(
            response_type="nonsenseresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        self.assertEquals(self._get_reducer_output([input_data]), tuple())

    @with_luigi_config('answer-distribution', 'valid_response_types', OPTION_REMOVED)
    def test_valid_response_type_default(self):
        answer_data = self._get_answer_data(
            response_type="multiplechoiceresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output,))

    @with_luigi_config('answer-distribution', 'valid_response_types', 'multiplechoiceresponse,numericalresponse')
    def test_filtered_response_type_with_config(self):
        answer_data = self._get_answer_data(
            response_type="nonsenseresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        self.assertEquals(self._get_reducer_output([input_data]), tuple())

    @with_luigi_config('answer-distribution', 'valid_response_types', 'multiplechoiceresponse,numericalresponse')
    def test_valid_response_type_with_config(self):
        answer_data = self._get_answer_data(
            response_type="multiplechoiceresponse",
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data)
        self._check_output([input_data], (expected_output,))

    def test_filtered_non_submission_answer(self):
        answer_data = self._get_non_submission_answer_data()
        input_data = (self.timestamp, json.dumps(answer_data))
        self.assertEquals(self._get_reducer_output([input_data]), tuple())

    def test_two_answer_event_same(self):
        answer_data = self._get_answer_data()
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data))
        input_data_2 = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data, **{'Last Response Count': 2})
        self._check_output([input_data_1, input_data_2], (expected_output,))

    def test_two_answer_event_same_reversed(self):
        answer_data = self._get_answer_data()
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data))
        input_data_2 = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(answer_data, **{'Last Response Count': 2})
        self._check_output([input_data_2, input_data_1], (expected_output,))

    def test_two_answer_event_same_old_and_new(self):
        answer_data_1 = self._get_non_submission_answer_data()
        answer_data_2 = self._get_answer_data()
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output = self._get_expected_output(answer_data_2, **{'Last Response Count': 2})
        self._check_output([input_data_1, input_data_2], (expected_output,))

    def test_same_old_and_new_with_variant(self):
        answer_data_1 = self._get_non_submission_answer_data(variant=123)
        answer_data_2 = self._get_answer_data(variant=123)
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output = self._get_expected_output(answer_data_2, **{'Last Response Count': 2})
        self._check_output([input_data_1, input_data_2], (expected_output,))

    def test_two_answer_event_different_answer(self):
        answer_data_1 = self._get_answer_data(answer="first")
        answer_data_2 = self._get_answer_data(answer="second")
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_2 = self._get_expected_output(answer_data_2)
        self._check_output([input_data_1, input_data_2], (expected_output_1, expected_output_2))

    def test_two_answer_event_different_answer_by_whitespace(self):
        answer_data_1 = self._get_answer_data(answer="\t\n\nfirst   ")
        answer_data_2 = self._get_answer_data(answer="first")
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output = self._get_expected_output(answer_data_2, **{'Last Response Count': 2})
        self._check_output([input_data_1, input_data_2], (expected_output,))

    def test_two_answer_event_different_old_and_new(self):
        answer_data_1 = self._get_non_submission_answer_data(answer_value_id="first")
        answer_data_2 = self._get_answer_data(problem_display_name=self.problem_display_name)
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_2 = self._get_expected_output(answer_data_2)
        # An older non-submission-based event should not inherit
        # information from a newer submission-based event.
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_1['Problem Display Name'] = expected_output_2['Problem Display Name']
        self._check_output([input_data_1, input_data_2], (expected_output_1, expected_output_2))

    def test_two_answer_event_different_variant(self):
        answer_data_1 = self._get_answer_data(variant=123)
        answer_data_2 = self._get_answer_data(variant=456)
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_2 = self._get_expected_output(answer_data_2)
        self._check_output([input_data_1, input_data_2], (expected_output_1, expected_output_2))

    def test_two_answer_event_different_variant_empty_new(self):
        answer_data_1 = self._get_answer_data(variant=123)
        answer_data_2 = self._get_answer_data(variant='')
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_2 = self._get_expected_output(answer_data_2)
        self._check_output([input_data_1, input_data_2], (expected_output_1, expected_output_2))

    def test_problem_type_changed_to_multi_choice(self):
        answer_data_1 = self._get_answer_data(
            answer=u'First Ch\u014dice',
            response_type='optionresponse',
        )
        answer_data_2 = self._get_answer_data(
            answer_value_id=['choice_1', 'choice_2', 'choice_4'],
            answer=[u'First Ch\u014dice', u'Second Ch\u014dice', u'Fourth Ch\u014dice'],
            response_type="multiplechoiceresponse",
        )
        input_data_1 = (self.earlier_timestamp, json.dumps(answer_data_1))
        input_data_2 = (self.timestamp, json.dumps(answer_data_2))
        expected_output_1 = self._get_expected_output(answer_data_1)
        expected_output_2 = self._get_expected_output(
            answer_data_2,
            ValueID='[choice_1|choice_2|choice_4]',
            AnswerValue=u'[First Ch\u014dice|Second Ch\u014dice|Fourth Ch\u014dice]'
        )
        self._check_output([input_data_1, input_data_2], (expected_output_1, expected_output_2))

    def _load_metadata(self, **kwargs):
        """Defines some metadata for test answer."""
        metadata_dict = {
            self.answer_id: {
                "question": u"Pick One or \u00b2",
                "response_type": "multiplechoiceresponse",
                "input_type": "my_input_type",
                "problem_display_name": self.problem_display_name,
            }
        }
        metadata_dict[self.answer_id].update(**kwargs)
        answer_metadata = StringIO.StringIO(json.dumps(metadata_dict))
        self.task.load_answer_metadata(answer_metadata)

    def test_non_submission_choice_with_metadata(self):
        self._load_metadata(
            answer_value_id_map={"choice_1": u"First Ch\u014dice", "choice_2": u"Second Ch\u014dice"}
        )
        answer_data = self._get_non_submission_answer_data(
            answer_value_id='choice_1',
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='choice_1',
            AnswerValue=u'First Ch\u014dice',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name
        self._check_output([input_data], (expected_output,))

    def test_non_submission_multichoice_with_metadata(self):
        self._load_metadata(
            answer_value_id_map={"choice_1": "First Choice", "choice_2": "Second Choice"}
        )
        answer_data = self._get_non_submission_answer_data(
            answer_value_id=['choice_1', 'choice_2']
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='[choice_1|choice_2]',
            AnswerValue='[First Choice|Second Choice]',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name

        self._check_output([input_data], (expected_output,))

    def test_non_submission_nonmapped_multichoice_with_metadata(self):
        self._load_metadata()
        answer_data = self._get_non_submission_answer_data(
            answer_value_id=['choice_1', 'choice_2']
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='[choice_1|choice_2]',
            AnswerValue='',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name
        self._check_output([input_data], (expected_output,))

    def test_non_submission_nonmapped_choice_with_metadata(self):
        self._load_metadata()
        answer_data = self._get_non_submission_answer_data(
            answer_value_id='choice_1'
        )
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            ValueID='choice_1',
            AnswerValue='',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name
        self._check_output([input_data], (expected_output,))

    def test_non_submission_nonmapped_nonchoice_with_metadata(self):
        self._load_metadata(response_type="optionresponse")
        answer_data = self._get_non_submission_answer_data()
        input_data = (self.timestamp, json.dumps(answer_data))
        expected_output = self._get_expected_output(
            answer_data,
            AnswerValue=u'\u00b2',
            Question=u"Pick One or \u00b2",
        )
        expected_output["Problem Display Name"] = self.problem_display_name
        self._check_output([input_data], (expected_output,))