Пример #1
0
    def test_create_classifiers_no_training_examples(self):
        # Create a workflow with no training examples
        workflow = AITrainingWorkflow.objects.create(algorithm_id=ALGORITHM_ID)

        # Expect an error when we try to create classifiers
        with self.assertRaises(AITrainingInternalError):
            ai_worker_api.create_classifiers(workflow.uuid, CLASSIFIERS)
Пример #2
0
    def test_create_classifiers_no_training_examples(self):
        # Create a workflow with no training examples
        workflow = AITrainingWorkflow.objects.create(algorithm_id=ALGORITHM_ID)

        # Expect an error when we try to create classifiers
        with self.assertRaises(AITrainingInternalError):
            ai_worker_api.create_classifiers(workflow.uuid, CLASSIFIERS)
Пример #3
0
    def test_create_classifiers_serialize_error(self):
        # Mutate the classifier data so it is NOT JSON-serializable
        classifiers = copy.deepcopy(CLASSIFIERS)
        classifiers[u"vøȼȺƀᵾłȺɍɏ"] = datetime.datetime.now()

        # Expect an error when we try to create the classifiers
        with self.assertRaises(AITrainingInternalError):
            ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
Пример #4
0
    def test_create_classifiers_missing_criteria(self):
        # Remove a criterion from the classifiers dict
        classifiers = copy.deepcopy(CLASSIFIERS)
        del classifiers[u"vøȼȺƀᵾłȺɍɏ"]

        # Expect an error when we try to create the classifiers
        with self.assertRaises(AITrainingRequestError):
            ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
Пример #5
0
    def test_create_classifiers_missing_criteria(self):
        # Remove a criterion from the classifiers dict
        classifiers = copy.deepcopy(CLASSIFIERS)
        del classifiers[u"vøȼȺƀᵾłȺɍɏ"]

        # Expect an error when we try to create the classifiers
        with self.assertRaises(AITrainingRequestError):
            ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
Пример #6
0
    def test_create_classifiers_serialize_error(self):
        # Mutate the classifier data so it is NOT JSON-serializable
        classifiers = copy.deepcopy(CLASSIFIERS)
        classifiers[u"vøȼȺƀᵾłȺɍɏ"] = datetime.datetime.now()

        # Expect an error when we try to create the classifiers
        with self.assertRaises(AITrainingInternalError):
            ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
Пример #7
0
    def test_create_classifiers_unrecognized_criterion(self):
        # Add an extra criterion to the classifiers dict
        classifiers = copy.deepcopy(CLASSIFIERS)
        classifiers[u"extra_criterion"] = copy.deepcopy(classifiers[u"vøȼȺƀᵾłȺɍɏ"])

        # Expect an error when we try to create the classifiers
        with self.assertRaises(AITrainingRequestError):
            ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
Пример #8
0
    def test_create_classifiers_unrecognized_criterion(self):
        # Add an extra criterion to the classifiers dict
        classifiers = copy.deepcopy(CLASSIFIERS)
        classifiers[u"extra_criterion"] = copy.deepcopy(
            classifiers[u"vøȼȺƀᵾłȺɍɏ"])

        # Expect an error when we try to create the classifiers
        with self.assertRaises(AITrainingRequestError):
            ai_worker_api.create_classifiers(self.workflow_uuid, classifiers)
Пример #9
0
    def test_create_classifiers(self):
        ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)

        # Expect that the workflow was marked complete
        workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
        self.assertIsNot(workflow.completed_at, None)

        # Expect that the classifier set was created with the correct data
        self.assertIsNot(workflow.classifier_set, None)
        saved_classifiers = workflow.classifier_set.classifier_data_by_criterion
        self.assertItemsEqual(CLASSIFIERS, saved_classifiers)
Пример #10
0
    def test_create_classifiers(self):
        ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)

        # Expect that the workflow was marked complete
        workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
        self.assertIsNot(workflow.completed_at, None)

        # Expect that the classifier set was created with the correct data
        self.assertIsNot(workflow.classifier_set, None)
        saved_classifiers = workflow.classifier_set.classifier_data_by_criterion
        self.assertItemsEqual(CLASSIFIERS, saved_classifiers)
Пример #11
0
    def test_create_classifiers_twice(self):
        # Simulate repeated task execution for the same workflow
        # Since these are executed sequentially, the second call should
        # have no effect.
        ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
        ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)

        # Expect that the workflow was marked complete
        workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
        self.assertIsNot(workflow.completed_at, None)

        # Expect that the classifier set was created with the correct data
        self.assertIsNot(workflow.classifier_set, None)
        saved_classifiers = workflow.classifier_set.classifier_data_by_criterion
        self.assertItemsEqual(CLASSIFIERS, saved_classifiers)
Пример #12
0
    def test_create_classifiers_twice(self):
        # Simulate repeated task execution for the same workflow
        # Since these are executed sequentially, the second call should
        # have no effect.
        ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
        ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)

        # Expect that the workflow was marked complete
        workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
        self.assertIsNot(workflow.completed_at, None)

        # Expect that the classifier set was created with the correct data
        self.assertIsNot(workflow.classifier_set, None)
        saved_classifiers = workflow.classifier_set.classifier_data_by_criterion
        self.assertItemsEqual(CLASSIFIERS, saved_classifiers)
Пример #13
0
 def test_create_classifiers_upload_error(self, mock_data):
     # Simulate an error occurring when uploading the trained classifier
     mock_data.save.side_effect = IOError("OH NO!!!")
     with self.assertRaises(AITrainingInternalError):
         ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
Пример #14
0
 def test_create_classifiers_database_error(self, mock_get):
     mock_get.side_effect = DatabaseError("KABOOM!")
     with self.assertRaises(AITrainingInternalError):
         ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
Пример #15
0
 def test_create_classifiers_upload_error(self, mock_data):
     # Simulate an error occurring when uploading the trained classifier
     mock_data.save.side_effect = IOError("OH NO!!!")
     with self.assertRaises(AITrainingInternalError):
         ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
Пример #16
0
 def test_create_classifiers_database_error(self, mock_get):
     mock_get.side_effect = DatabaseError("KABOOM!")
     with self.assertRaises(AITrainingInternalError):
         ai_worker_api.create_classifiers(self.workflow_uuid, CLASSIFIERS)
Пример #17
0
 def test_create_classifiers_no_workflow(self):
     with self.assertRaises(AITrainingRequestError):
         ai_worker_api.create_classifiers("invalid_uuid", CLASSIFIERS)
Пример #18
0
def train_classifiers(workflow_uuid):
    """
    Asynchronous task to train classifiers for AI grading.
    This task uses the AI API to retrieve task parameters
    (algorithm ID and training examples) and upload
    the trained classifiers.

    If the task could not be completed successfully,
    it is retried a few times.  If it continues to fail,
    it is left incomplete.  Since the AI API tracks all
    training tasks in the database, incomplete tasks
    can always be rescheduled manually later.

    Args:
        workflow_uuid (str): The UUID of the workflow associated
            with this training task.

    Returns:
        None

    Raises:
        AIError: An error occurred during a request to the AI API.
        AIAlgorithmError: An error occurred while training the AI classifiers.
        InvalidExample: The training examples provided by the AI API were not valid.

    """
    # Short-circuit if the workflow is already marked complete
    # This is an optimization, but training tasks could still
    # execute multiple times depending on when they get picked
    # up by workers and marked complete.
    try:
        if ai_worker_api.is_training_workflow_complete(workflow_uuid):
            return
    except AIError:
        msg = (u"An unexpected error occurred while checking the "
               u"completion of training workflow with UUID {uuid}").format(
                   uuid=workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve task parameters
    try:
        params = ai_worker_api.get_training_task_params(workflow_uuid)
        examples = params['training_examples']
        algorithm_id = params['algorithm_id']
        course_id = params['course_id']
        item_id = params['item_id']
    except (AIError, KeyError):
        msg = (u"An error occurred while retrieving AI training "
               u"task parameters for the workflow with UUID {}"
               ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve the ML algorithm to use for training
    # (based on task params and worker configuration)
    try:
        algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
    except AIAlgorithmError:
        msg = (
            u"An error occurred while loading the "
            u"AI algorithm (training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIError:
        msg = (u"An error occurred while retrieving "
               u"the algorithm ID (training workflow UUID {})"
               ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Train a classifier for each criterion
    # The AIAlgorithm subclass is responsible for ensuring that
    # the trained classifiers are JSON-serializable.
    try:
        classifier_set = {
            criterion_name: algorithm.train_classifier(examples_dict)
            for criterion_name, examples_dict in _examples_by_criterion(
                examples).iteritems()
        }
    except InvalidExample:
        msg = (u"Training example format was not valid "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIAlgorithmError:
        msg = (u"An error occurred while training AI classifiers "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upload the classifiers
    # (implicitly marks the workflow complete)
    try:
        ai_worker_api.create_classifiers(workflow_uuid, classifier_set)
    except AIError:
        msg = (u"An error occurred while uploading trained classifiers "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upon successful completion of the creation of classifiers, we will try to automatically schedule any
    # grading tasks for the same item.
    try:
        reschedule_grading_tasks.apply_async(args=[course_id, item_id])
    except AIGradingInternalError as ex:
        msg = (
            u"An error occured while trying to regrade all ungraded assignments"
            u"after classifiers were trained successfully: {}").format(ex)
        logger.exception(msg)
        # Here we don't retry, because they will already retry once in the grading task.
        raise
Пример #19
0
def train_classifiers(workflow_uuid):
    """
    Asynchronous task to train classifiers for AI grading.
    This task uses the AI API to retrieve task parameters
    (algorithm ID and training examples) and upload
    the trained classifiers.

    If the task could not be completed successfully,
    it is retried a few times.  If it continues to fail,
    it is left incomplete.  Since the AI API tracks all
    training tasks in the database, incomplete tasks
    can always be rescheduled manually later.

    Args:
        workflow_uuid (str): The UUID of the workflow associated
            with this training task.

    Returns:
        None

    Raises:
        AIError: An error occurred during a request to the AI API.
        AIAlgorithmError: An error occurred while training the AI classifiers.
        InvalidExample: The training examples provided by the AI API were not valid.

    """
    # Short-circuit if the workflow is already marked complete
    # This is an optimization, but training tasks could still
    # execute multiple times depending on when they get picked
    # up by workers and marked complete.
    try:
        if ai_worker_api.is_training_workflow_complete(workflow_uuid):
            return
    except AIError:
        msg = (
            u"An unexpected error occurred while checking the "
            u"completion of training workflow with UUID {uuid}"
        ).format(uuid=workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve task parameters
    try:
        params = ai_worker_api.get_training_task_params(workflow_uuid)
        examples = params['training_examples']
        algorithm_id = params['algorithm_id']
        course_id = params['course_id']
        item_id = params['item_id']
    except (AIError, KeyError):
        msg = (
            u"An error occurred while retrieving AI training "
            u"task parameters for the workflow with UUID {}"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve the ML algorithm to use for training
    # (based on task params and worker configuration)
    try:
        algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
    except AIAlgorithmError:
        msg = (
            u"An error occurred while loading the "
            u"AI algorithm (training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIError:
        msg = (
            u"An error occurred while retrieving "
            u"the algorithm ID (training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Train a classifier for each criterion
    # The AIAlgorithm subclass is responsible for ensuring that
    # the trained classifiers are JSON-serializable.
    try:
        classifier_set = {
            criterion_name: algorithm.train_classifier(examples_dict)
            for criterion_name, examples_dict
            in _examples_by_criterion(examples).iteritems()
        }
    except InvalidExample:
        msg = (
            u"Training example format was not valid "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIAlgorithmError:
        msg = (
            u"An error occurred while training AI classifiers "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upload the classifiers
    # (implicitly marks the workflow complete)
    try:
        ai_worker_api.create_classifiers(workflow_uuid, classifier_set)
    except AIError:
        msg = (
            u"An error occurred while uploading trained classifiers "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upon successful completion of the creation of classifiers, we will try to automatically schedule any
    # grading tasks for the same item.
    try:
        reschedule_grading_tasks.apply_async(args=[course_id, item_id])
    except AIGradingInternalError as ex:
        msg = (
            u"An error occured while trying to regrade all ungraded assignments"
            u"after classifiers were trained successfully: {}"
        ).format(ex)
        logger.exception(msg)
        # Here we don't retry, because they will already retry once in the grading task.
        raise
Пример #20
0
 def test_create_classifiers_no_workflow(self):
     with self.assertRaises(AITrainingRequestError):
         ai_worker_api.create_classifiers("invalid_uuid", CLASSIFIERS)