Exemplo n.º 1
0
 def test_get_training_task_params(self):
     params = ai_worker_api.get_training_task_params(self.workflow_uuid)
     expected_examples = [
         {
             'text': EXAMPLES[0]['answer'],
             'scores': {
                 u"vøȼȺƀᵾłȺɍɏ": 1,
                 u"ﻭɼค๓๓คɼ": 0
             }
         },
         {
             'text': EXAMPLES[1]['answer'],
             'scores': {
                 u"vøȼȺƀᵾłȺɍɏ": 0,
                 u"ﻭɼค๓๓คɼ": 2
             }
         },
     ]
     self.assertItemsEqual(params['training_examples'], expected_examples)
     self.assertItemsEqual(params['algorithm_id'], ALGORITHM_ID)
Exemplo n.º 2
0
 def test_get_training_task_params(self):
     params = ai_worker_api.get_training_task_params(self.workflow_uuid)
     expected_examples = [
         {
             'text': EXAMPLES[0]['answer'],
             'scores': {
                 u"vøȼȺƀᵾłȺɍɏ": 1,
                 u"ﻭɼค๓๓คɼ": 0
             }
         },
         {
             'text': EXAMPLES[1]['answer'],
             'scores': {
                 u"vøȼȺƀᵾłȺɍɏ": 0,
                 u"ﻭɼค๓๓คɼ": 2
             }
         },
     ]
     self.assertItemsEqual(params['training_examples'], expected_examples)
     self.assertItemsEqual(params['algorithm_id'], ALGORITHM_ID)
Exemplo n.º 3
0
    def _assert_mutated_examples(self, mutate_func):
        """
        Mutate the training examples returned by the API,
        then check that we get the expected error.

        This *may* be a little paranoid :)

        Args:
            mutate_func (callable): Function that accepts a single argument,
                the list of example dictionaries.

        Raises:
            AssertionError

        """
        params = ai_worker_api.get_training_task_params(self.workflow_uuid)
        mutate_func(params['training_examples'])

        call_signature = 'openassessment.assessment.worker.training.ai_worker_api.get_training_task_params'
        with mock.patch(call_signature) as mock_call:
            mock_call.return_value = params
            with self.assert_retry(train_classifiers, InvalidExample):
                train_classifiers(self.workflow_uuid)
Exemplo n.º 4
0
    def _assert_mutated_examples(self, mutate_func):
        """
        Mutate the training examples returned by the API,
        then check that we get the expected error.

        This *may* be a little paranoid :)

        Args:
            mutate_func (callable): Function that accepts a single argument,
                the list of example dictionaries.

        Raises:
            AssertionError

        """
        params = ai_worker_api.get_training_task_params(self.workflow_uuid)
        mutate_func(params['training_examples'])

        call_signature = 'openassessment.assessment.worker.training.ai_worker_api.get_training_task_params'
        with mock.patch(call_signature) as mock_call:
            mock_call.return_value = params
            with self.assert_retry(train_classifiers, InvalidExample):
                train_classifiers(self.workflow_uuid)
Exemplo n.º 5
0
def train_classifiers(workflow_uuid):
    """
    Asynchronous task to train classifiers for AI grading.
    This task uses the AI API to retrieve task parameters
    (algorithm ID and training examples) and upload
    the trained classifiers.

    If the task could not be completed successfully,
    it is retried a few times.  If it continues to fail,
    it is left incomplete.  Since the AI API tracks all
    training tasks in the database, incomplete tasks
    can always be rescheduled manually later.

    Args:
        workflow_uuid (str): The UUID of the workflow associated
            with this training task.

    Returns:
        None

    Raises:
        AIError: An error occurred during a request to the AI API.
        AIAlgorithmError: An error occurred while training the AI classifiers.
        InvalidExample: The training examples provided by the AI API were not valid.

    """
    # Short-circuit if the workflow is already marked complete
    # This is an optimization, but training tasks could still
    # execute multiple times depending on when they get picked
    # up by workers and marked complete.
    try:
        if ai_worker_api.is_training_workflow_complete(workflow_uuid):
            return
    except AIError:
        msg = (
            u"An unexpected error occurred while checking the "
            u"completion of training workflow with UUID {uuid}"
        ).format(uuid=workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve task parameters
    try:
        params = ai_worker_api.get_training_task_params(workflow_uuid)
        examples = params['training_examples']
        algorithm_id = params['algorithm_id']
        course_id = params['course_id']
        item_id = params['item_id']
    except (AIError, KeyError):
        msg = (
            u"An error occurred while retrieving AI training "
            u"task parameters for the workflow with UUID {}"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve the ML algorithm to use for training
    # (based on task params and worker configuration)
    try:
        algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
    except AIAlgorithmError:
        msg = (
            u"An error occurred while loading the "
            u"AI algorithm (training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIError:
        msg = (
            u"An error occurred while retrieving "
            u"the algorithm ID (training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Train a classifier for each criterion
    # The AIAlgorithm subclass is responsible for ensuring that
    # the trained classifiers are JSON-serializable.
    try:
        classifier_set = {
            criterion_name: algorithm.train_classifier(examples_dict)
            for criterion_name, examples_dict
            in _examples_by_criterion(examples).iteritems()
        }
    except InvalidExample:
        msg = (
            u"Training example format was not valid "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIAlgorithmError:
        msg = (
            u"An error occurred while training AI classifiers "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upload the classifiers
    # (implicitly marks the workflow complete)
    try:
        ai_worker_api.create_classifiers(workflow_uuid, classifier_set)
    except AIError:
        msg = (
            u"An error occurred while uploading trained classifiers "
            u"(training workflow UUID {})"
        ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upon successful completion of the creation of classifiers, we will try to automatically schedule any
    # grading tasks for the same item.
    try:
        reschedule_grading_tasks.apply_async(args=[course_id, item_id])
    except AIGradingInternalError as ex:
        msg = (
            u"An error occured while trying to regrade all ungraded assignments"
            u"after classifiers were trained successfully: {}"
        ).format(ex)
        logger.exception(msg)
        # Here we don't retry, because they will already retry once in the grading task.
        raise
Exemplo n.º 6
0
def train_classifiers(workflow_uuid):
    """
    Asynchronous task to train classifiers for AI grading.
    This task uses the AI API to retrieve task parameters
    (algorithm ID and training examples) and upload
    the trained classifiers.

    If the task could not be completed successfully,
    it is retried a few times.  If it continues to fail,
    it is left incomplete.  Since the AI API tracks all
    training tasks in the database, incomplete tasks
    can always be rescheduled manually later.

    Args:
        workflow_uuid (str): The UUID of the workflow associated
            with this training task.

    Returns:
        None

    Raises:
        AIError: An error occurred during a request to the AI API.
        AIAlgorithmError: An error occurred while training the AI classifiers.
        InvalidExample: The training examples provided by the AI API were not valid.

    """
    # Short-circuit if the workflow is already marked complete
    # This is an optimization, but training tasks could still
    # execute multiple times depending on when they get picked
    # up by workers and marked complete.
    try:
        if ai_worker_api.is_training_workflow_complete(workflow_uuid):
            return
    except AIError:
        msg = (u"An unexpected error occurred while checking the "
               u"completion of training workflow with UUID {uuid}").format(
                   uuid=workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve task parameters
    try:
        params = ai_worker_api.get_training_task_params(workflow_uuid)
        examples = params['training_examples']
        algorithm_id = params['algorithm_id']
        course_id = params['course_id']
        item_id = params['item_id']
    except (AIError, KeyError):
        msg = (u"An error occurred while retrieving AI training "
               u"task parameters for the workflow with UUID {}"
               ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Retrieve the ML algorithm to use for training
    # (based on task params and worker configuration)
    try:
        algorithm = AIAlgorithm.algorithm_for_id(algorithm_id)
    except AIAlgorithmError:
        msg = (
            u"An error occurred while loading the "
            u"AI algorithm (training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIError:
        msg = (u"An error occurred while retrieving "
               u"the algorithm ID (training workflow UUID {})"
               ).format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Train a classifier for each criterion
    # The AIAlgorithm subclass is responsible for ensuring that
    # the trained classifiers are JSON-serializable.
    try:
        classifier_set = {
            criterion_name: algorithm.train_classifier(examples_dict)
            for criterion_name, examples_dict in _examples_by_criterion(
                examples).iteritems()
        }
    except InvalidExample:
        msg = (u"Training example format was not valid "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()
    except AIAlgorithmError:
        msg = (u"An error occurred while training AI classifiers "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upload the classifiers
    # (implicitly marks the workflow complete)
    try:
        ai_worker_api.create_classifiers(workflow_uuid, classifier_set)
    except AIError:
        msg = (u"An error occurred while uploading trained classifiers "
               u"(training workflow UUID {})").format(workflow_uuid)
        logger.exception(msg)
        raise train_classifiers.retry()

    # Upon successful completion of the creation of classifiers, we will try to automatically schedule any
    # grading tasks for the same item.
    try:
        reschedule_grading_tasks.apply_async(args=[course_id, item_id])
    except AIGradingInternalError as ex:
        msg = (
            u"An error occured while trying to regrade all ungraded assignments"
            u"after classifiers were trained successfully: {}").format(ex)
        logger.exception(msg)
        # Here we don't retry, because they will already retry once in the grading task.
        raise
Exemplo n.º 7
0
 def test_get_training_task_params_database_error(self, mock_get):
     mock_get.side_effect = DatabaseError("KABOOM!")
     with self.assertRaises(AITrainingInternalError):
         ai_worker_api.get_training_task_params(self.workflow_uuid)
Exemplo n.º 8
0
 def test_get_training_task_params_no_workflow(self):
     with self.assertRaises(AITrainingRequestError):
         ai_worker_api.get_training_task_params("invalid_uuid")
Exemplo n.º 9
0
 def test_get_training_task_params_database_error(self, mock_get):
     mock_get.side_effect = DatabaseError("KABOOM!")
     with self.assertRaises(AITrainingInternalError):
         ai_worker_api.get_training_task_params(self.workflow_uuid)
Exemplo n.º 10
0
 def test_get_training_task_params_no_workflow(self):
     with self.assertRaises(AITrainingRequestError):
         ai_worker_api.get_training_task_params("invalid_uuid")