def validate_config(self, config_string): self.validate_label_config(config_string) if not hasattr(self, 'summary'): return # validate data columns consistency fields_from_config = get_all_object_tag_names(config_string) if not fields_from_config: logger.debug(f'Data fields not found in labeling config') return fields_from_data = set(self.summary.common_data_columns) fields_from_data.discard(settings.DATA_UNDEFINED_NAME) if fields_from_data and not fields_from_config.issubset(fields_from_data): different_fields = list(fields_from_config.difference(fields_from_data)) raise ValidationError(f'These fields are not present in the data: {",".join(different_fields)}') # validate annotations consistency annotations_from_config = set(get_all_control_tag_tuples(config_string)) if not annotations_from_config: logger.debug(f'Annotation schema is not found in config') return annotations_from_data = set(self.summary.created_annotations) if annotations_from_data and not annotations_from_data.issubset(annotations_from_config): different_annotations = list(annotations_from_data.difference(annotations_from_config)) diff_str = [] for ann_tuple in different_annotations: from_name, to_name, t = ann_tuple.split('|') diff_str.append( f'{self.summary.created_annotations[ann_tuple]} ' f'with from_name={from_name}, to_name={to_name}, type={t}') diff_str = '\n'.join(diff_str) raise ValidationError(f'Created annotations are incompatible with provided labeling schema, ' f'we found:\n{diff_str}') # validate labels consistency labels_from_config = get_all_labels(config_string) created_labels = self.summary.created_labels for control_tag_from_data, labels_from_data in created_labels.items(): # Check if labels created in annotations, and their control tag has been removed if labels_from_data and control_tag_from_data not in labels_from_config: raise ValidationError( f'There are {sum(labels_from_data.values(), 0)} annotation(s) created with tag ' f'"{control_tag_from_data}", you can\'t remove it') labels_from_config_by_tag = set(labels_from_config[control_tag_from_data]) if not set(labels_from_data).issubset(set(labels_from_config_by_tag)): different_labels = list(set(labels_from_data).difference(labels_from_config_by_tag)) diff_str = '\n'.join(f'{l} ({labels_from_data[l]} annotations)' for l in different_labels) raise ValidationError(f'These labels still exist in annotations:\n{diff_str}')
def validate_config(self, config_string, strict=False): self.validate_label_config(config_string) if not hasattr(self, 'summary'): return if self.num_tasks == 0: logger.debug(f'Project {self} has no tasks: nothing to validate here. Ensure project summary is empty') self.summary.reset() return # validate data columns consistency fields_from_config = get_all_object_tag_names(config_string) if not fields_from_config: logger.debug(f'Data fields not found in labeling config') return fields_from_config = {field.split('[')[0] for field in fields_from_config} # Repeater tag support fields_from_data = set(self.summary.common_data_columns) fields_from_data.discard(settings.DATA_UNDEFINED_NAME) if fields_from_data and not fields_from_config.issubset(fields_from_data): different_fields = list(fields_from_config.difference(fields_from_data)) raise LabelStudioValidationErrorSentryIgnored( f'These fields are not present in the data: {",".join(different_fields)}' ) if self.num_annotations == 0: logger.debug( f'Project {self} has no annotations: nothing to validate here. ' f'Ensure annotations-related project summary is empty' ) self.summary.reset(tasks_data_based=False) return # validate annotations consistency annotations_from_config = set(get_all_control_tag_tuples(config_string)) if not annotations_from_config: logger.debug(f'Annotation schema is not found in config') return annotations_from_data = set(self.summary.created_annotations) if annotations_from_data and not annotations_from_data.issubset(annotations_from_config): different_annotations = list(annotations_from_data.difference(annotations_from_config)) diff_str = [] for ann_tuple in different_annotations: from_name, to_name, t = ann_tuple.split('|') diff_str.append( f'{self.summary.created_annotations[ann_tuple]} ' f'with from_name={from_name}, to_name={to_name}, type={t}' ) diff_str = '\n'.join(diff_str) raise LabelStudioValidationErrorSentryIgnored( f'Created annotations are incompatible with provided labeling schema, we found:\n{diff_str}' ) # validate labels consistency labels_from_config, dynamic_label_from_config = get_all_labels(config_string) created_labels = self.summary.created_labels for control_tag_from_data, labels_from_data in created_labels.items(): # Check if labels created in annotations, and their control tag has been removed if labels_from_data and ((control_tag_from_data not in labels_from_config) and ( control_tag_from_data not in dynamic_label_from_config)): raise LabelStudioValidationErrorSentryIgnored( f'There are {sum(labels_from_data.values(), 0)} annotation(s) created with tag ' f'"{control_tag_from_data}", you can\'t remove it' ) labels_from_config_by_tag = set(labels_from_config[control_tag_from_data]) if not set(labels_from_data).issubset(set(labels_from_config_by_tag)): different_labels = list(set(labels_from_data).difference(labels_from_config_by_tag)) diff_str = '\n'.join(f'{l} ({labels_from_data[l]} annotations)' for l in different_labels) if (strict is True) and (control_tag_from_data not in dynamic_label_from_config): raise LabelStudioValidationErrorSentryIgnored( f'These labels still exist in annotations:\n{diff_str}') else: logger.warning(f'project_id={self.id} inconsistent labels in config and annotations: {diff_str}')