Exemplo n.º 1
0
    def __init__(self,
                 labels: Union[List[str], List[int]],
                 sequence_field: SequenceField,
                 label_namespace: str = 'labels') -> None:
        self.labels = labels
        self.sequence_field = sequence_field
        self._label_namespace = label_namespace
        self._indexed_labels = None

        if not (self._label_namespace.endswith("tags") or self._label_namespace.endswith("labels")):
            logger.warning("Your sequence label namespace was '%s'. We recommend you use a namespace "
                           "ending with 'tags' or 'labels', so we don't add UNK and PAD tokens by "
                           "default to your vocabulary.  See documentation for "
                           "`non_padded_namespaces` parameter in Vocabulary.", self._label_namespace)

        if len(labels) != sequence_field.sequence_length():
            raise ConfigurationError("Label length and sequence length "
                                     "don't match: %d and %d" % (len(labels), sequence_field.sequence_length()))

        if all([isinstance(x, int) for x in labels]):
            self._indexed_labels = labels

        elif not all([isinstance(x, str) for x in labels]):
            raise ConfigurationError("SequenceLabelFields must be passed either all "
                                     "strings or all ints. Found labels {} with "
                                     "types: {}.".format(labels, [type(x) for x in labels]))
Exemplo n.º 2
0
    def __init__(
        self,
        labels: Union[List[str], List[int]],
        sequence_field: SequenceField,
        label_namespace: str = "labels",
    ) -> None:
        self.labels = labels
        self.sequence_field = sequence_field
        self._label_namespace = label_namespace
        self._indexed_labels = None
        self._maybe_warn_for_namespace(label_namespace)
        if len(labels) != sequence_field.sequence_length():
            raise ConfigurationError(
                "Label length and sequence length "
                "don't match: %d and %d" %
                (len(labels), sequence_field.sequence_length()))

        self._skip_indexing = False
        if all(isinstance(x, int) for x in labels):
            self._indexed_labels = labels
            self._skip_indexing = True

        elif not all(isinstance(x, str) for x in labels):
            raise ConfigurationError(
                "SequenceLabelFields must be passed either all "
                "strings or all ints. Found labels {} with "
                "types: {}.".format(labels, [type(x) for x in labels]))
Exemplo n.º 3
0
    def __init__(
        self,
        indices: List[Tuple[int, int]],
        sequence_field1: SequenceField,
        sequence_field2: SequenceField,
        labels: List[str] = None,
        label_namespace: str = "labels",
        padding_value: int = -1,
    ) -> None:
        self.indices = indices
        self.labels = labels
        self.sequence_field1 = sequence_field1
        self.sequence_field2 = sequence_field2
        self._label_namespace = label_namespace
        self._padding_value = padding_value
        self._indexed_labels: Optional[List[int]] = None

        self._maybe_warn_for_namespace(label_namespace)
        num_rows = sequence_field1.sequence_length()
        num_columns = sequence_field2.sequence_length()

        if len(set(indices)) != len(indices):
            raise ConfigurationError(
                f"Indices must be unique, but found {indices}")

        if not all(0 <= index[1] < num_columns and 0 <= index[0] < num_rows
                   for index in indices):
            raise ConfigurationError(
                f"Label indices and sequence length "
                f"are incompatible: {indices} and {num_rows}, {num_columns}")

        if labels is not None and len(indices) != len(labels):
            raise ConfigurationError(
                f"Labelled indices were passed, but their lengths do not match: "
                f" {labels}, {indices}")
Exemplo n.º 4
0
    def __init__(self, labels: List[Dict], sequence_field: SequenceField,
                 ontology: set) -> None:
        # sort ontology
        self.ontology = sorted(list(set(ontology)))
        self.ontology_to_idx = {}
        self.idx_to_ontology = {}
        for i, k in enumerate(self.ontology):
            self.ontology_to_idx[k] = i
            self.idx_to_ontology[i] = k

        self.labels = [None for i in range(sequence_field.sequence_length())]
        self.masks = [None for i in range(sequence_field.sequence_length())]
        for lab_idx, label_dict in enumerate(labels):
            mask_vector = np.zeros((len(self.ontology)))
            label_vector = np.zeros((len(self.ontology)))
            for k, v in label_dict.items():
                # skip everything that isn't in the ontology
                if k not in self.ontology:
                    continue
                k_idx = self.ontology_to_idx[k]
                value = v['value']
                confidence = v['confidence']
                label_vector[k_idx] = value
                mask_vector[k_idx] = confidence

            self.labels[lab_idx] = label_vector
            self.masks[lab_idx] = mask_vector

        self.sequence_field = sequence_field

        if len(labels) != sequence_field.sequence_length():
            raise ConfigurationError(
                "Label length and sequence length "
                "don't match: %d and %d" %
                (len(labels), sequence_field.sequence_length()))
Exemplo n.º 5
0
    def __init__(
        self,
        labels: Sequence[Sequence[Union[str, int]]],
        sequence_field: SequenceField,
        label_namespace: str = "labels",
        skip_indexing: bool = False,
        num_labels: Optional[int] = None,
    ) -> None:
        self.labels = labels
        self.sequence_field = sequence_field
        self._label_namespace = label_namespace
        self._indexed_labels = None
        self._label_ids = None
        self._maybe_warn_for_namespace(label_namespace)
        self._num_labels = num_labels

        if len(labels) != sequence_field.sequence_length():
            raise ConfigurationError(
                "Label length and sequence length "
                "don't match: %d and %d" %
                (len(labels), sequence_field.sequence_length()))

        self._skip_indexing = False
        for label_list in labels:
            if all(isinstance(x, int) for x in label_list):
                self._indexed_labels = labels
                self._skip_indexing = True

            elif not all(isinstance(x, str) for x in label_list):
                raise ConfigurationError(
                    "SequenceLabelFields must be passed either all "
                    "strings or all ints. Found labels {} with "
                    "types: {}.".format(label_list,
                                        [type(x) for x in label_list]))

        if skip_indexing and self.labels:
            if not all(isinstance(label, int) for label in labels):
                raise ConfigurationError(
                    "In order to skip indexing, your labels must be integers. "
                    "Found labels = {}".format(labels))
            if not num_labels:
                raise ConfigurationError(
                    "In order to skip indexing, num_labels can't be None.")

            if not all(cast(int, label) < num_labels for label in labels):
                raise ConfigurationError(
                    "All labels should be < num_labels. "
                    "Found num_labels = {} and labels = {} ".format(
                        num_labels, labels))

            self._label_ids = labels
        else:
            for label_list in labels:
                if not all(isinstance(label, str) for label in label_list):
                    raise ConfigurationError(
                        "SequenceMultiLabelFields expects string labels if skip_indexing=False. "
                        "Found labels: {}".format(labels))
Exemplo n.º 6
0
    def __init__(self,
                 indices: List[Tuple[int, int, int]],
                 sequence_field: SequenceField,
                 labels: List[str] = None,
                 label_namespace: str = 'labels',
                 padding_value: int = -1) -> None:
        self.indices = indices
        self.labels = labels
        self.sequence_field = sequence_field
        self._label_namespace = label_namespace
        self._padding_value = padding_value
        self._indexed_labels: List[int] = None

        self._maybe_warn_for_namespace(label_namespace)
        field_length = sequence_field.sequence_length()

        if len(set(indices)) != len(indices):
            raise ConfigurationError(
                f"Indices must be unique, but found {indices}")

        if not all([
                0 <= index[1] < field_length and 0 <= index[0] < field_length
                for index in indices
        ]):
            raise ConfigurationError(
                f"Label indices and sequence length "
                f"are incompatible: {indices} and {field_length}")

        if labels is not None and len(indices) != len(labels):
            raise ConfigurationError(
                f"Labelled indices were passed, but their lengths do not match: "
                f" {labels}, {indices}")
Exemplo n.º 7
0
    def __init__(self,
                 labels: Union[List[str], List[int]],
                 sequence_field: SequenceField,
                 label_namespace: str = 'labels') -> None:
        self.labels = labels
        self.sequence_field = sequence_field
        self._label_namespace = label_namespace
        self._indexed_labels = None
        self._maybe_warn_for_namespace(label_namespace)
        if len(labels) != sequence_field.sequence_length():
            raise ConfigurationError("Label length and sequence length "
                                     "don't match: %d and %d" % (len(labels), sequence_field.sequence_length()))

        if all([isinstance(x, int) for x in labels]):
            self._indexed_labels = labels

        elif not all([isinstance(x, str) for x in labels]):
            raise ConfigurationError("SequenceLabelFields must be passed either all "
                                     "strings or all ints. Found labels {} with "
                                     "types: {}.".format(labels, [type(x) for x in labels]))
Exemplo n.º 8
0
    def __init__(self,
                 tags: List[str],
                 sequence_field: SequenceField,
                 tag_namespace: str = 'tags') -> None:
        self._tags = tags
        self._sequence_field = sequence_field
        self._tag_namespace = tag_namespace
        self._indexed_tags = None  # type: Optional[List[int]]
        self._num_tags = None  # type: Optional[int]

        if not self._tag_namespace.endswith("tags"):
            logger.warning(
                "Your tag namespace was '%s'. We recommend you use a namespace "
                "ending with 'tags', so we don't add UNK and PAD tokens by "
                "default to your vocabulary.  See documentation for "
                "`non_padded_namespaces` parameter in Vocabulary.",
                self._tag_namespace)

        if len(tags) != sequence_field.sequence_length():
            raise ConfigurationError(
                "Tag length and sequence length "
                "don't match: %d and %d" %
                (len(tags), sequence_field.sequence_length()))
    def __init__(
        self,
        labels: Sequence[Sequence[Union[str, int]]],
        sequence_field: SequenceField,
        label_namespace: str = "labels",
        skip_indexing: bool = False,
        num_labels: Optional[int] = None,
    ) -> None:
        self.labels = labels
        self._sequence_field = sequence_field
        self._label_namespace = label_namespace
        self._label_ids = None
        self._maybe_warn_for_namespace(label_namespace)
        self._num_labels = num_labels
        if len(labels) != sequence_field.sequence_length():
            raise ConfigurationError(
                "Label length and sequence length "
                "don't match: %d and %d" %
                (len(labels), sequence_field.sequence_length()))

        if not all(isinstance(label_list, list) for label_list in labels):
            raise ConfigurationError(
                "SequenceMultiLabelFields expects a list-of-lists where each sublist contains the labels. "
                "Found labels: {}".format(labels))

        self._skip_indexing = False
        for label_list in labels:
            if all([isinstance(label, int) for label in label_list]):
                self._label_ids = labels
                self._skip_indexing = True

        if self._skip_indexing == False:
            for label_list in labels:
                if not all(isinstance(label, str) for label in label_list):
                    raise ConfigurationError(
                        "SequenceMultiLabelFields expects string labels if skip_indexing=False."
                        "Found labels: {}".format(labels))