def validate_training_examples(rubric, examples): """ Validate that the training examples match the rubric. Args: rubric (dict): Serialized rubric model. examples (list): List of serialized training examples. Returns: list of errors (unicode) Raises: StudentTrainingRequestError StudentTrainingInternalError Example usage: >>> options = [ >>> { >>> "order_num": 0, >>> "name": "poor", >>> "explanation": "Poor job!", >>> "points": 0, >>> }, >>> { >>> "order_num": 1, >>> "name": "good", >>> "explanation": "Good job!", >>> "points": 1, >>> }, >>> { >>> "order_num": 2, >>> "name": "excellent", >>> "explanation": "Excellent job!", >>> "points": 2, >>> }, >>> ] >>> >>> rubric = { >>> "prompt": "Write an essay!", >>> "criteria": [ >>> { >>> "order_num": 0, >>> "name": "vocabulary", >>> "prompt": "How varied is the vocabulary?", >>> "options": options >>> }, >>> { >>> "order_num": 1, >>> "name": "grammar", >>> "prompt": "How correct is the grammar?", >>> "options": options >>> } >>> ] >>> } >>> >>> examples = [ >>> { >>> 'answer': u'Lorem ipsum', >>> 'options_selected': { >>> 'vocabulary': 'good', >>> 'grammar': 'excellent' >>> } >>> }, >>> { >>> 'answer': u'Doler', >>> 'options_selected': { >>> 'vocabulary': 'good', >>> 'grammar': 'poor' >>> } >>> } >>> ] >>> >>> errors = validate_training_examples(rubric, examples) """ errors = [] # Construct a list of valid options for each criterion try: criteria_options = { unicode(criterion['name']): [ unicode(option['name']) for option in criterion['options'] ] for criterion in rubric['criteria'] } except (ValueError, KeyError): logger.warning("Could not parse serialized rubric", exc_info=True) return [_(u"Could not parse serialized rubric")] # Check that at least one criterion in the rubric has options # If this is not the case (that is, if all rubric criteria are written feedback only), # then it doesn't make sense to do student training. criteria_without_options = [ criterion_name for criterion_name, criterion_option_list in criteria_options.iteritems() if len(criterion_option_list) == 0 ] if len(set(criteria_options) - set(criteria_without_options)) == 0: return [_( u"When you include a student training assessment, " u"the rubric for the assessment must contain at least one criterion, " u"and each criterion must contain at least two options." )] # Check each example for order_num, example_dict in enumerate(examples, start=1): # Check the structure of the example dict is_format_valid, format_errors = validate_training_example_format(example_dict) if not is_format_valid: format_errors = [ _(u"Example {example_number} has a validation error: {error}").format( example_number=order_num, error=error ) for error in format_errors ] errors.extend(format_errors) else: # Check each selected option in the example (one per criterion) options_selected = example_dict['options_selected'] for criterion_name, option_name in options_selected.iteritems(): if criterion_name in criteria_options: valid_options = criteria_options[criterion_name] if option_name not in valid_options: msg = _( u"Example {example_number} has an invalid option " u"for \"{criterion_name}\": \"{option_name}\"" ).format( example_number=order_num, criterion_name=criterion_name, option_name=option_name ) errors.append(msg) else: msg = _( u"Example {example_number} has an extra option " u"for \"{criterion_name}\"" ).format( example_number=order_num, criterion_name=criterion_name ) errors.append(msg) # Check for missing criteria # Ignore options all_example_criteria = set(options_selected.keys() + criteria_without_options) for missing_criterion in set(criteria_options.keys()) - all_example_criteria: msg = _( u"Example {example_number} is missing an option " u"for \"{criterion_name}\"" ).format( example_number=order_num, criterion_name=missing_criterion ) errors.append(msg) return errors
def validate_training_examples(rubric, examples): """ Validate that the training examples match the rubric. Args: rubric (dict): Serialized rubric model. examples (list): List of serialized training examples. Returns: list of errors (unicode) Raises: StudentTrainingRequestError StudentTrainingInternalError Example usage: >>> options = [ >>> { >>> "order_num": 0, >>> "name": "poor", >>> "explanation": "Poor job!", >>> "points": 0, >>> }, >>> { >>> "order_num": 1, >>> "name": "good", >>> "explanation": "Good job!", >>> "points": 1, >>> }, >>> { >>> "order_num": 2, >>> "name": "excellent", >>> "explanation": "Excellent job!", >>> "points": 2, >>> }, >>> ] >>> >>> rubric = { >>> "prompt": "Write an essay!", >>> "criteria": [ >>> { >>> "order_num": 0, >>> "name": "vocabulary", >>> "prompt": "How varied is the vocabulary?", >>> "options": options >>> }, >>> { >>> "order_num": 1, >>> "name": "grammar", >>> "prompt": "How correct is the grammar?", >>> "options": options >>> } >>> ] >>> } >>> >>> examples = [ >>> { >>> 'answer': u'Lorem ipsum', >>> 'options_selected': { >>> 'vocabulary': 'good', >>> 'grammar': 'excellent' >>> } >>> }, >>> { >>> 'answer': u'Doler', >>> 'options_selected': { >>> 'vocabulary': 'good', >>> 'grammar': 'poor' >>> } >>> } >>> ] >>> >>> errors = validate_training_examples(rubric, examples) """ errors = [] # Construct a list of valid options for each criterion try: criteria_options = { unicode(criterion['name']): [unicode(option['name']) for option in criterion['options']] for criterion in rubric['criteria'] } except (ValueError, KeyError): logger.warning("Could not parse serialized rubric", exc_info=True) return [_(u"Could not parse serialized rubric")] # Check each example for order_num, example_dict in enumerate(examples, start=1): # Check the structure of the example dict is_format_valid, format_errors = validate_training_example_format( example_dict) if not is_format_valid: format_errors = [ _(u"Example {example_number} has a validation error: {error}"). format(example_number=order_num, error=error) for error in format_errors ] errors.extend(format_errors) else: # Check each selected option in the example (one per criterion) options_selected = example_dict['options_selected'] for criterion_name, option_name in options_selected.iteritems(): if criterion_name in criteria_options: valid_options = criteria_options[criterion_name] if option_name not in valid_options: msg = _( u"Example {example_number} has an invalid option " u"for \"{criterion_name}\": \"{option_name}\"" ).format(example_number=order_num, criterion_name=criterion_name, option_name=option_name) errors.append(msg) else: msg = _(u"Example {example_number} has an extra option " u"for \"{criterion_name}\"").format( example_number=order_num, criterion_name=criterion_name) errors.append(msg) # Check for missing criteria for missing_criterion in set(criteria_options.keys()) - set( options_selected.keys()): msg = _(u"Example {example_number} is missing an option " u"for \"{criterion_name}\"").format( example_number=order_num, criterion_name=missing_criterion) errors.append(msg) return errors
def validate_training_examples(rubric, examples): """ Validate that the training examples match the rubric. Args: rubric (dict): Serialized rubric model. examples (list): List of serialized training examples. Returns: list of errors (unicode) Raises: StudentTrainingRequestError StudentTrainingInternalError Example usage: >>> options = [ >>> { >>> "order_num": 0, >>> "name": "poor", >>> "explanation": "Poor job!", >>> "points": 0, >>> }, >>> { >>> "order_num": 1, >>> "name": "good", >>> "explanation": "Good job!", >>> "points": 1, >>> }, >>> { >>> "order_num": 2, >>> "name": "excellent", >>> "explanation": "Excellent job!", >>> "points": 2, >>> }, >>> ] >>> >>> rubric = { >>> "prompts": [{"description": "Write an essay!"}], >>> "criteria": [ >>> { >>> "order_num": 0, >>> "name": "vocabulary", >>> "prompt": "How varied is the vocabulary?", >>> "options": options >>> }, >>> { >>> "order_num": 1, >>> "name": "grammar", >>> "prompt": "How correct is the grammar?", >>> "options": options >>> } >>> ] >>> } >>> >>> examples = [ >>> { >>> 'answer': {'parts': [{'text': u'Lorem ipsum'}]}, >>> 'options_selected': { >>> 'vocabulary': 'good', >>> 'grammar': 'excellent' >>> } >>> }, >>> { >>> 'answer': {'parts': [{'text': u'Doler'}]}, >>> 'options_selected': { >>> 'vocabulary': 'good', >>> 'grammar': 'poor' >>> } >>> } >>> ] >>> >>> errors = validate_training_examples(rubric, examples) """ errors = [] # Construct a list of valid options for each criterion try: criteria_options = { str(criterion['name']): [ str(option['name']) for option in criterion['options'] ] for criterion in rubric['criteria'] } except (ValueError, KeyError): logger.warning("Could not parse serialized rubric", exc_info=True) return [_("Could not parse serialized rubric")] # Check that at least one criterion in the rubric has options # If this is not the case (that is, if all rubric criteria are written feedback only), # then it doesn't make sense to do student training. criteria_without_options = [ criterion_name for criterion_name, criterion_option_list in criteria_options.items() if len(criterion_option_list) == 0 ] if not (set(criteria_options) - set(criteria_without_options)): return [_( "If your assignment includes a learner training step, " "the rubric must have at least one criterion, " "and that criterion must have at least one option." )] # Check each example for order_num, example_dict in enumerate(examples, start=1): # Check the structure of the example dict is_format_valid, format_errors = validate_training_example_format(example_dict) if not is_format_valid: format_errors = [ _("Example {example_number} has a validation error: {error}").format( example_number=order_num, error=error ) for error in format_errors ] errors.extend(format_errors) else: # Check each selected option in the example (one per criterion) options_selected = example_dict['options_selected'] for criterion_name, option_name in options_selected.items(): if criterion_name in criteria_options: valid_options = criteria_options[criterion_name] if option_name not in valid_options: msg = _( "Example {example_number} has an invalid option " "for \"{criterion_name}\": \"{option_name}\"" ).format( example_number=order_num, criterion_name=criterion_name, option_name=option_name ) errors.append(msg) else: msg = _( "Example {example_number} has an extra option " "for \"{criterion_name}\"" ).format( example_number=order_num, criterion_name=criterion_name ) errors.append(msg) # Check for missing criteria # Ignore options all_example_criteria = set(list(options_selected.keys()) + criteria_without_options) for missing_criterion in set(criteria_options.keys()) - all_example_criteria: msg = _( "Example {example_number} is missing an option " "for \"{criterion_name}\"" ).format( example_number=order_num, criterion_name=missing_criterion ) errors.append(msg) return errors
def validate_training_examples(rubric, examples): """ Validate that the training examples match the rubric. Args: rubric (dict): Serialized rubric model. examples (list): List of serialized training examples. Returns: list of errors (unicode) Raises: StudentTrainingRequestError StudentTrainingInternalError Example usage: >>> options = [ >>> { >>> "order_num": 0, >>> "name": "poor", >>> "explanation": "Poor job!", >>> "points": 0, >>> }, >>> { >>> "order_num": 1, >>> "name": "good", >>> "explanation": "Good job!", >>> "points": 1, >>> }, >>> { >>> "order_num": 2, >>> "name": "excellent", >>> "explanation": "Excellent job!", >>> "points": 2, >>> }, >>> ] >>> >>> rubric = { >>> "prompt": "Write an essay!", >>> "criteria": [ >>> { >>> "order_num": 0, >>> "name": "vocabulary", >>> "prompt": "How varied is the vocabulary?", >>> "options": options >>> }, >>> { >>> "order_num": 1, >>> "name": "grammar", >>> "prompt": "How correct is the grammar?", >>> "options": options >>> } >>> ] >>> } >>> >>> examples = [ >>> { >>> 'answer': u'Lorem ipsum', >>> 'options_selected': { >>> 'vocabulary': 'good', >>> 'grammar': 'excellent' >>> } >>> }, >>> { >>> 'answer': u'Doler', >>> 'options_selected': { >>> 'vocabulary': 'good', >>> 'grammar': 'poor' >>> } >>> } >>> ] >>> >>> errors = validate_training_examples(rubric, examples) """ errors = [] # Construct a list of valid options for each criterion try: criteria_options = { unicode(criterion['name']): [ unicode(option['name']) for option in criterion['options'] ] for criterion in rubric['criteria'] } except (ValueError, KeyError) as ex: msg = _(u"Could not parse serialized rubric") logger.warning("{}: {}".format(msg, ex)) return [msg] # Check each example for order_num, example_dict in enumerate(examples, start=1): # Check the structure of the example dict is_format_valid, format_errors = validate_training_example_format(example_dict) if not is_format_valid: format_errors = [ _(u"Example {} has a validation error: {}").format(order_num, error) for error in format_errors ] errors.extend(format_errors) else: # Check each selected option in the example (one per criterion) options_selected = example_dict['options_selected'] for criterion_name, option_name in options_selected.iteritems(): if criterion_name in criteria_options: valid_options = criteria_options[criterion_name] if option_name not in valid_options: msg = u"Example {} has an invalid option for \"{}\": \"{}\"".format( order_num, criterion_name, option_name ) errors.append(msg) else: msg = _(u"Example {} has an extra option for \"{}\"").format( order_num, criterion_name ) errors.append(msg) # Check for missing criteria for missing_criterion in set(criteria_options.keys()) - set(options_selected.keys()): msg = _(u"Example {} is missing an option for \"{}\"").format( order_num, missing_criterion ) errors.append(msg) return errors