示例#1
0
 def test_is_workflow_complete(self):
     self.assertFalse(
         ai_worker_api.is_training_workflow_complete(self.workflow_uuid))
     workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
     workflow.mark_complete_and_save()
     self.assertTrue(
         ai_worker_api.is_training_workflow_complete(self.workflow_uuid))
示例#2
0
def train_classifiers(workflow_uuid):
    """
    Asynchronous task to train classifiers for AI grading.
    This task uses the AI API to retrieve task parameters
    (algorithm ID and training examples) and upload
    the trained classifiers.

    If the task could not be completed successfully,
    it is retried a few times.  If it continues to fail,
    it is left incomplete.  Since the AI API tracks all
    training tasks in the database, incomplete tasks
    can always be rescheduled manually later.

    Args:
        workflow_uuid (str): The UUID of the workflow associated
            with this training task.

    Returns:
        None

    Raises:
        AIError: An error occurred during a request to the AI API.
        AIAlgorithmError: An error occurred while training the AI classifiers.
        InvalidExample: The training examples provided by the AI API were not valid.

    """
    # Short-circuit if the workflow is already marked complete
    # This is an optimization, but training tasks could still
    # execute multiple times depending on when they get picked
    # up by workers and marked complete.
    try:
        if ai_worker_api.is_training_workflow_complete(workflow_uuid):
            return
    except AIError:
        msg = (
            u"An unexpected error occurred while checking the "
            u"completion of training workflow with UUID {uuid}"
        ).format(uuid=workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve task parameters
    try:
        params = ai_worker_api.get_training_task_params(workflow_uuid)
        examples = params['training_examples']
        algorithm_id = params['algorithm_id']
        course_id = params['course_id']
        item_id = params['item_id']
    except (AIError, KeyError):
        msg = (
            u"An error occurred while retrieving AI training "
            u"task parameters for the workflow with UUID {}"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve the ML algorithm to use for training
    # (based on task params and worker configuration)
    try:
        algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
    except AIAlgorithmError:
        msg = (
            u"An error occurred while loading the "
            u"AI algorithm (training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIError:
        msg = (
            u"An error occurred while retrieving "
            u"the algorithm ID (training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Train a classifier for each criterion
    # The AIAlgorithm subclass is responsible for ensuring that
    # the trained classifiers are JSON-serializable.
    try:
        classifier_set = {
            criterion_name: algorithm.train_classifier(examples_dict)
            for criterion_name, examples_dict
            in _examples_by_criterion(examples).iteritems()
        }
    except InvalidExample:
        msg = (
            u"Training example format was not valid "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIAlgorithmError:
        msg = (
            u"An error occurred while training AI classifiers "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upload the classifiers
    # (implicitly marks the workflow complete)
    try:
        ai_worker_api.create_classifiers(workflow_uuid, classifier_set)
    except AIError:
        msg = (
            u"An error occurred while uploading trained classifiers "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upon successful completion of the creation of classifiers, we will try to automatically schedule any
    # grading tasks for the same item.
    try:
        reschedule_grading_tasks.apply_async(args=[course_id, item_id])
    except AIGradingInternalError as ex:
        msg = (
            u"An error occured while trying to regrade all ungraded assignments"
            u"after classifiers were trained successfully: {}"
        ).format(ex)
        logger.exception(msg)
        # Here we don't retry, because they will already retry once in the grading task.
        raise
示例#3
0
def train_classifiers(workflow_uuid):
    """
    Asynchronous task to train classifiers for AI grading.
    This task uses the AI API to retrieve task parameters
    (algorithm ID and training examples) and upload
    the trained classifiers.

    If the task could not be completed successfully,
    it is retried a few times.  If it continues to fail,
    it is left incomplete.  Since the AI API tracks all
    training tasks in the database, incomplete tasks
    can always be rescheduled manually later.

    Args:
        workflow_uuid (str): The UUID of the workflow associated
            with this training task.

    Returns:
        None

    Raises:
        AIError: An error occurred during a request to the AI API.
        AIAlgorithmError: An error occurred while training the AI classifiers.
        InvalidExample: The training examples provided by the AI API were not valid.

    """
    # Short-circuit if the workflow is already marked complete
    # This is an optimization, but training tasks could still
    # execute multiple times depending on when they get picked
    # up by workers and marked complete.
    try:
        if ai_worker_api.is_training_workflow_complete(workflow_uuid):
            return
    except AIError:
        msg = (u"An unexpected error occurred while checking the "
               u"completion of training workflow with UUID {uuid}").format(
                   uuid=workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve task parameters
    try:
        params = ai_worker_api.get_training_task_params(workflow_uuid)
        examples = params['training_examples']
        algorithm_id = params['algorithm_id']
        course_id = params['course_id']
        item_id = params['item_id']
    except (AIError, KeyError):
        msg = (u"An error occurred while retrieving AI training "
               u"task parameters for the workflow with UUID {}"
               ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve the ML algorithm to use for training
    # (based on task params and worker configuration)
    try:
        algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
    except AIAlgorithmError:
        msg = (
            u"An error occurred while loading the "
            u"AI algorithm (training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIError:
        msg = (u"An error occurred while retrieving "
               u"the algorithm ID (training workflow UUID {})"
               ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Train a classifier for each criterion
    # The AIAlgorithm subclass is responsible for ensuring that
    # the trained classifiers are JSON-serializable.
    try:
        classifier_set = {
            criterion_name: algorithm.train_classifier(examples_dict)
            for criterion_name, examples_dict in _examples_by_criterion(
                examples).iteritems()
        }
    except InvalidExample:
        msg = (u"Training example format was not valid "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIAlgorithmError:
        msg = (u"An error occurred while training AI classifiers "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upload the classifiers
    # (implicitly marks the workflow complete)
    try:
        ai_worker_api.create_classifiers(workflow_uuid, classifier_set)
    except AIError:
        msg = (u"An error occurred while uploading trained classifiers "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upon successful completion of the creation of classifiers, we will try to automatically schedule any
    # grading tasks for the same item.
    try:
        reschedule_grading_tasks.apply_async(args=[course_id, item_id])
    except AIGradingInternalError as ex:
        msg = (
            u"An error occured while trying to regrade all ungraded assignments"
            u"after classifiers were trained successfully: {}").format(ex)
        logger.exception(msg)
        # Here we don't retry, because they will already retry once in the grading task.
        raise
示例#4
0
 def test_is_workflow_complete_database_error(self, mock_call):
     mock_call.side_effect = DatabaseError("Oh no!")
     with self.assertRaises(AITrainingInternalError):
         ai_worker_api.is_training_workflow_complete(self.workflow_uuid)
示例#5
0
 def test_is_workflow_complete_no_such_workflow(self):
     with self.assertRaises(AITrainingRequestError):
         ai_worker_api.is_training_workflow_complete('no such workflow')
示例#6
0
 def test_is_workflow_complete(self):
     self.assertFalse(ai_worker_api.is_training_workflow_complete(self.workflow_uuid))
     workflow = AITrainingWorkflow.objects.get(uuid=self.workflow_uuid)
     workflow.mark_complete_and_save()
     self.assertTrue(ai_worker_api.is_training_workflow_complete(self.workflow_uuid))
示例#7
0
 def test_is_workflow_complete_database_error(self, mock_call):
     mock_call.side_effect = DatabaseError("Oh no!")
     with self.assertRaises(AITrainingInternalError):
         ai_worker_api.is_training_workflow_complete(self.workflow_uuid)
示例#8
0
 def test_is_workflow_complete_no_such_workflow(self):
     with self.assertRaises(AITrainingRequestError):
         ai_worker_api.is_training_workflow_complete('no such workflow')