Пример #1
0
class Entity(RDFBase):
    def __init__(self, id, label, types, offset=None, confidence=0.0):
        # type: (str, str, List[str], Optional[slice], float) -> None
        """
        Construct Entity Object
        Parameters
        ----------
        id: str
            URI of entity
        label: str
            Label of entity
        types: List[str]
            List of types for this entity
        offset: Optional[slice]
            Indeces of substring where this entity was mentioned
        confidence: float
            Confidence value that this entity was mentioned
        """
        super(Entity, self).__init__(id, label, offset, confidence)

        self._types = [t for t in types if t != '' and t is not None]
        self._types = list(dict.fromkeys(self._types))

    @property
    def types(self):
        # type: () -> List[str]
        return self._types

    @property
    def types_names(self):
        # type: () -> str
        return ' or '.join(
            [t for t in self._types if t.lower() not in NOT_TO_MENTION_TYPES])

    def add_types(self, types):
        # type: (List[str]) -> ()
        fixed_types = [t for t in types if t != '' and t is not None]
        self._types.extend(fixed_types)

    def casefold(self, format='triple'):
        # type (str) -> ()
        """
        Format the labels to match triples or natural language
        Parameters
        ----------
        format

        Returns
        -------

        """
        if format == 'triple':
            # Label
            self._label = Literal(self.label.lower().replace(" ", "-"))
            # Types
            self._types = [t.lower().replace(" ", "_") for t in self.types]

        elif format == 'natural':
            # Label
            self._label = self.label.lower().replace("_", " ")
            self._label = self._label.capitalize() if is_proper_noun(
                self.types) else self._label
            # Types
            self._types = [t.lower().replace("_", " ") for t in self.types]