def test_grade_essay_all_feedback_only_criteria(self): # Modify the rubric to include only feedback-only criteria rubric = copy.deepcopy(RUBRIC) for criterion in rubric['criteria']: criterion['options'] = [] # Train classifiers for the rubric train_classifiers(rubric, {}) # Schedule a grading task and retrieve the assessment ai_api.on_init(self.submission_uuid, rubric=rubric, algorithm_id=ALGORITHM_ID) assessment = ai_api.get_latest_assessment(self.submission_uuid) # Verify that all assessment parts have feedback set to an empty string for part in assessment['parts']: self.assertEqual(part['feedback'], u"") # Check the scores by criterion dict # Since none of the criteria had options, the scores should all default to 0 score_dict = ai_api.get_assessment_scores_by_criteria( self.submission_uuid) self.assertItemsEqual(score_dict, { u"vøȼȺƀᵾłȺɍɏ": 0, u"ﻭɼค๓๓คɼ": 0, })
def test_grade_essay_feedback_only_criterion(self): # Modify the rubric to include a feedback-only criterion # (a criterion with no options, just written feedback) rubric = copy.deepcopy(RUBRIC) rubric['criteria'].append({ 'name': 'feedback only', 'prompt': 'feedback', 'options': [] }) # Train classifiers for the rubric train_classifiers(rubric, self.CLASSIFIER_SCORE_OVERRIDES) # Schedule a grading task and retrieve the assessment ai_api.on_init(self.submission_uuid, rubric=rubric, algorithm_id=ALGORITHM_ID) assessment = ai_api.get_latest_assessment(self.submission_uuid) # Verify that the criteria with options were given scores # (from the score override used by our fake classifiers) self.assertEqual(assessment['parts'][0]['criterion']['name'], u"vøȼȺƀᵾłȺɍɏ") self.assertEqual(assessment['parts'][0]['option']['points'], 1) self.assertEqual(assessment['parts'][1]['criterion']['name'], u"ﻭɼค๓๓คɼ") self.assertEqual(assessment['parts'][1]['option']['points'], 2) # Verify that the criteria with no options (only feedback) # has no score and empty feedback self.assertEqual(assessment['parts'][2]['criterion']['name'], u"feedback only") self.assertIs(assessment['parts'][2]['option'], None) self.assertEqual(assessment['parts'][2]['feedback'], u"") # Check the scores by criterion dict score_dict = ai_api.get_assessment_scores_by_criteria(self.submission_uuid) self.assertEqual(score_dict[u"vøȼȺƀᵾłȺɍɏ"], 1) self.assertEqual(score_dict[u"ﻭɼค๓๓คɼ"], 2) self.assertEqual(score_dict['feedback only'], 0)
def test_no_score(self): # Test that no score has been created, and get_score returns None. ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) score = ai_api.get_score(self.submission_uuid, {}) self.assertIsNone(score)
def test_submit_database_error_filter(self, mock_filter): mock_filter.side_effect = DatabaseError( "rumble... ruMBLE, RUMBLE! BOOM!") with self.assertRaises(AIGradingInternalError): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
def _ai_assess(sub): """ Helper to fulfill ai assessment requirements. """ # Note that CLASSIFIER_SCORE_OVERRIDES matches OPTIONS_SELECTED_DICT['most'] scores train_classifiers(RUBRIC, AIGradingTest.CLASSIFIER_SCORE_OVERRIDES) ai_api.on_init(sub, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) return ai_api.get_latest_assessment(sub)
def test_submit_celery_error(self): with mock.patch( 'openassessment.assessment.api.ai.grading_tasks.grade_essay.apply_async' ) as mock_grade: mock_grade.side_effect = NotConfigured with self.assertRaises(AIGradingInternalError): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
def test_submit_no_classifiers(self, mock_call): mock_call.return_value = [] with mock.patch( 'openassessment.assessment.api.ai.logger.info') as mock_log: ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) argument = mock_log.call_args[0][0] self.assertTrue(u"no classifiers are available" in argument)
def test_submit_submission_db_error(self): with mock.patch( 'openassessment.assessment.api.ai.AIGradingWorkflow.start_workflow' ) as mock_start: mock_start.side_effect = sub_api.SubmissionInternalError with self.assertRaises(AIGradingInternalError): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
def test_get_assessment_scores_by_criteria(self): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # Verify that we got the scores we provided to the stub AI algorithm assessment = ai_api.get_latest_assessment(self.submission_uuid) assessment_score_dict = ai_api.get_assessment_scores_by_criteria(self.submission_uuid) for part in assessment['parts']: criterion_name = part['option']['criterion']['name'] expected_score = self.CLASSIFIER_SCORE_OVERRIDES[criterion_name]['score_override'] self.assertEqual(assessment_score_dict[criterion_name], expected_score)
def test_automatic_grade_error(self): # Create some submissions which will not succeed. No classifiers yet exist. for _ in range(0, 10): submission = sub_api.create_submission(STUDENT_ITEM, ANSWER) ai_api.on_init(submission['uuid'], rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # Check that there are unresolved grading workflows self._assert_complete(training_done=True, grading_done=False) patched_method = 'openassessment.assessment.worker.training.reschedule_grading_tasks.apply_async' with mock.patch(patched_method) as mocked_reschedule_grading: mocked_reschedule_grading.side_effect = AIGradingInternalError("Kablewey.") with self.assertRaises(AIGradingInternalError): ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
def test_automatic_grade(self): # Create some submissions which will not succeed. No classifiers yet exist. for _ in range(0, 10): submission = sub_api.create_submission(STUDENT_ITEM, ANSWER) ai_api.on_init(submission['uuid'], rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # Check that there are unresolved grading workflows self._assert_complete(training_done=True, grading_done=False) # Create and train a classifier set. This should set off automatic grading. ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID) # Check to make sure that all work is done. self._assert_complete(training_done=True, grading_done=True)
def test_get_assessment_scores_by_criteria(self): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # Verify that we got the scores we provided to the stub AI algorithm assessment = ai_api.get_latest_assessment(self.submission_uuid) assessment_score_dict = ai_api.get_assessment_scores_by_criteria( self.submission_uuid) for part in assessment['parts']: criterion_name = part['option']['criterion']['name'] expected_score = self.CLASSIFIER_SCORE_OVERRIDES[criterion_name][ 'score_override'] self.assertEqual(assessment_score_dict[criterion_name], expected_score)
def test_grade_essay(self): # Schedule a grading task # Because Celery is configured in "always eager" mode, this will # be executed synchronously. ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # Verify that we got the scores we provided to the stub AI algorithm assessment = ai_api.get_latest_assessment(self.submission_uuid) for part in assessment['parts']: criterion_name = part['option']['criterion']['name'] expected_score = self.CLASSIFIER_SCORE_OVERRIDES[criterion_name]['score_override'] self.assertEqual(part['option']['points'], expected_score) score = ai_api.get_score(self.submission_uuid, {}) self.assertEquals(score["points_possible"], 4) self.assertEquals(score["points_earned"], 3)
def setUp(self): """ Sets up each test so that it will have unfinished tasks of both types """ # 1) Schedule Grading, have the scheduling succeeed but the grading fail because no classifiers exist for _ in range(0, 10): submission = sub_api.create_submission(STUDENT_ITEM, ANSWER) self.submission_uuid = submission['uuid'] ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # 2) Schedule Training, have it INTENTIONALLY fail. Now we are a point where both parts need to be rescheduled patched_method = 'openassessment.assessment.api.ai.training_tasks.train_classifiers.apply_async' with mock.patch(patched_method) as mock_train_classifiers: mock_train_classifiers.side_effect = AITrainingInternalError('Training Classifiers Failed for some Reason.') with self.assertRaises(AITrainingInternalError): ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID) self._assert_complete(training_done=False, grading_done=False)
def test_automatic_grade_error(self): # Create some submissions which will not succeed. No classifiers yet exist. for _ in range(0, 10): submission = sub_api.create_submission(STUDENT_ITEM, ANSWER) ai_api.on_init(submission['uuid'], rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # Check that there are unresolved grading workflows self._assert_complete(training_done=True, grading_done=False) patched_method = 'openassessment.assessment.worker.training.reschedule_grading_tasks.apply_async' with mock.patch(patched_method) as mocked_reschedule_grading: mocked_reschedule_grading.side_effect = AIGradingInternalError( "Kablewey.") with self.assertRaises(AIGradingInternalError): ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID)
def test_grade_essay(self): # Schedule a grading task # Because Celery is configured in "always eager" mode, this will # be executed synchronously. ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # Verify that we got the scores we provided to the stub AI algorithm assessment = ai_api.get_latest_assessment(self.submission_uuid) for part in assessment['parts']: criterion_name = part['option']['criterion']['name'] expected_score = self.CLASSIFIER_SCORE_OVERRIDES[criterion_name][ 'score_override'] self.assertEqual(part['option']['points'], expected_score) score = ai_api.get_score(self.submission_uuid, {}) self.assertEquals(score["points_possible"], 4) self.assertEquals(score["points_earned"], 3)
def test_reschedule_all_large(self): """ Specifically tests the querying mechanisms (python generator functions), and ensures that our methodology holds up for querysets with 125+ entries """ # Creates 125 more grades (for a total of 135) for _ in range(0, 125): submission = sub_api.create_submission(STUDENT_ITEM, ANSWER) self.submission_uuid = submission['uuid'] ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # Both training and grading should not be complete. self._assert_complete(grading_done=False, training_done=False) # Reschedule both ai_api.reschedule_unfinished_tasks(course_id=COURSE_ID, item_id=ITEM_ID, task_type=None) # Check that both training and grading are now complete self._assert_complete(grading_done=True, training_done=True)
def test_grade_essay_feedback_only_criterion(self): # Modify the rubric to include a feedback-only criterion # (a criterion with no options, just written feedback) rubric = copy.deepcopy(RUBRIC) rubric['criteria'].append({ 'name': 'feedback only', 'prompt': 'feedback', 'options': [] }) # Train classifiers for the rubric train_classifiers(rubric, self.CLASSIFIER_SCORE_OVERRIDES) # Schedule a grading task and retrieve the assessment ai_api.on_init(self.submission_uuid, rubric=rubric, algorithm_id=ALGORITHM_ID) assessment = ai_api.get_latest_assessment(self.submission_uuid) # Verify that the criteria with options were given scores # (from the score override used by our fake classifiers) self.assertEqual(assessment['parts'][0]['criterion']['name'], u"vøȼȺƀᵾłȺɍɏ") self.assertEqual(assessment['parts'][0]['option']['points'], 1) self.assertEqual(assessment['parts'][1]['criterion']['name'], u"ﻭɼค๓๓คɼ") self.assertEqual(assessment['parts'][1]['option']['points'], 2) # Verify that the criteria with no options (only feedback) # has no score and empty feedback self.assertEqual(assessment['parts'][2]['criterion']['name'], u"feedback only") self.assertIs(assessment['parts'][2]['option'], None) self.assertEqual(assessment['parts'][2]['feedback'], u"") # Check the scores by criterion dict score_dict = ai_api.get_assessment_scores_by_criteria( self.submission_uuid) self.assertEqual(score_dict[u"vøȼȺƀᵾłȺɍɏ"], 1) self.assertEqual(score_dict[u"ﻭɼค๓๓คɼ"], 2) self.assertEqual(score_dict['feedback only'], 0)
def setUp(self): """ Sets up each test so that it will have unfinished tasks of both types """ # 1) Schedule Grading, have the scheduling succeeed but the grading fail because no classifiers exist for _ in range(0, 10): submission = sub_api.create_submission(STUDENT_ITEM, ANSWER) self.submission_uuid = submission['uuid'] ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) # 2) Schedule Training, have it INTENTIONALLY fail. Now we are a point where both parts need to be rescheduled patched_method = 'openassessment.assessment.api.ai.training_tasks.train_classifiers.apply_async' with mock.patch(patched_method) as mock_train_classifiers: mock_train_classifiers.side_effect = AITrainingInternalError( 'Training Classifiers Failed for some Reason.') with self.assertRaises(AITrainingInternalError): ai_api.train_classifiers(RUBRIC, EXAMPLES, COURSE_ID, ITEM_ID, ALGORITHM_ID) self._assert_complete(training_done=False, grading_done=False)
def test_grade_essay_all_feedback_only_criteria(self): # Modify the rubric to include only feedback-only criteria rubric = copy.deepcopy(RUBRIC) for criterion in rubric['criteria']: criterion['options'] = [] # Train classifiers for the rubric train_classifiers(rubric, {}) # Schedule a grading task and retrieve the assessment ai_api.on_init(self.submission_uuid, rubric=rubric, algorithm_id=ALGORITHM_ID) assessment = ai_api.get_latest_assessment(self.submission_uuid) # Verify that all assessment parts have feedback set to an empty string for part in assessment['parts']: self.assertEqual(part['feedback'], u"") # Check the scores by criterion dict # Since none of the criteria had options, the scores should all default to 0 score_dict = ai_api.get_assessment_scores_by_criteria(self.submission_uuid) self.assertItemsEqual(score_dict, { u"vøȼȺƀᵾłȺɍɏ": 0, u"ﻭɼค๓๓คɼ": 0, })
def test_submit_celery_error(self): with mock.patch('openassessment.assessment.api.ai.grading_tasks.grade_essay.apply_async') as mock_grade: mock_grade.side_effect = NotConfigured with self.assertRaises(AIGradingInternalError): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
def test_submit_submission_db_error(self): with mock.patch('openassessment.assessment.api.ai.AIGradingWorkflow.start_workflow') as mock_start: mock_start.side_effect = sub_api.SubmissionInternalError with self.assertRaises(AIGradingInternalError): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
def test_submit_submission_not_found(self): with self.assertRaises(AIGradingRequestError): ai_api.on_init("no_such_submission", rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
def test_submit_invalid_rubric(self): invalid_rubric = {'not_valid': True} with self.assertRaises(AIGradingRequestError): ai_api.on_init(self.submission_uuid, rubric=invalid_rubric, algorithm_id=ALGORITHM_ID)
def test_submit_database_error_create(self, mock_call): mock_call.side_effect = DatabaseError("KABOOM!") with self.assertRaises(AIGradingInternalError): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
def test_submit_database_error_filter(self, mock_filter): mock_filter.side_effect = DatabaseError("rumble... ruMBLE, RUMBLE! BOOM!") with self.assertRaises(AIGradingInternalError): ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID)
def test_submit_no_classifiers(self, mock_call): mock_call.return_value = [] with mock.patch('openassessment.assessment.api.ai.logger.info') as mock_log: ai_api.on_init(self.submission_uuid, rubric=RUBRIC, algorithm_id=ALGORITHM_ID) argument = mock_log.call_args[0][0] self.assertTrue(u"no classifiers are available" in argument)