Python Token.whitespace_after примеры использования

Язык программирования: Python

Пространство имен/Пакет: flair.data

Класс/Тип: Token

Метод/Функция: whitespace_after

Примеров на hotexamples.com: 5

Python Token.whitespace_after - 5 примеров найдено. Это лучшие примеры Python кода для flair.data.Token.whitespace_after, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Token(30)

add_tag(22)

add_label(7)

whitespace_after(5)

end_pos(3)

start_pos(3)

get_head(2)

annotation_layers(1)

get_embedding(1)

get_tag(1)

Пример #1

Показать файл

Файл: sequence_labeling.py Проект: bnojavan/flair

 def _parse_token(self, line: str) -> Token:
     fields: List[str] = re.split(self.column_delimiter, line)
     token = Token(fields[self.text_column])
     for column in self.column_name_map:
         if len(fields) > column:
             if column != self.text_column and self.column_name_map[column] != self.SPACE_AFTER_KEY:
                 token.add_label(
                     self.column_name_map[column], fields[column]
                 )
             if self.column_name_map[column] == self.SPACE_AFTER_KEY and fields[column] == '-':
                 token.whitespace_after = False
     return token

Пример #2

Показать файл

Файл: conllu.py Проект: ydwisroad/competitions

    def token_list_to_sentence(self, token_list: conllu.TokenList) -> Sentence:
        sentence: Sentence = Sentence()

        # current token ID
        token_idx = 0

        for conllu_token in token_list:
            token = Token(conllu_token["form"])

            if "ner" in conllu_token:
                token.add_label("ner", conllu_token["ner"])

            if "ner-2" in conllu_token:
                token.add_label("ner-2", conllu_token["ner-2"])

            if "lemma" in conllu_token:
                token.add_label("lemma", conllu_token["lemma"])

            if "misc" in conllu_token and conllu_token["misc"] is not None:
                space_after = conllu_token["misc"].get("SpaceAfter")
                if space_after == "No":
                    token.whitespace_after = False

            sentence.add_token(token)
            token_idx += 1

        if "sentence_id" in token_list.metadata:
            sentence.add_label("sentence_id", token_list.metadata["sentence_id"])

        if "relations" in token_list.metadata:
            # relations: List[Relation] = []
            for head_start, head_end, tail_start, tail_end, label in token_list.metadata["relations"]:
                # head and tail span indices are 1-indexed and end index is inclusive
                head = Span(sentence.tokens[head_start - 1 : head_end])
                tail = Span(sentence.tokens[tail_start - 1 : tail_end])

                sentence.add_complex_label("relation", RelationLabel(value=label, head=head, tail=tail))

        # determine all NER label types in sentence and add all NER spans as sentence-level labels
        ner_label_types = []
        for token in sentence.tokens:
            for annotation in token.annotation_layers.keys():
                if annotation.startswith("ner") and annotation not in ner_label_types:
                    ner_label_types.append(annotation)

        for label_type in ner_label_types:
            spans = sentence.get_spans(label_type)
            for span in spans:
                sentence.add_complex_label("entity", label=SpanLabel(span=span, value=span.tag, score=span.score))

        return sentence

Пример #3

Показать файл

Файл: treebanks.py Проект: phkuep/flair

    def __getitem__(self, index: int = 0) -> Sentence:

        if self.in_memory:
            sentence = self.sentences[index]
        else:
            with open(str(self.path_to_conll_file), encoding="utf-8") as file:
                file.seek(self.indices[index])
                line = file.readline()
                sentence: Sentence = Sentence()
                while line:

                    line = line.strip()
                    fields: List[str] = re.split("\t+", line)
                    if line == "":
                        if len(sentence) > 0:
                            break

                    elif line.startswith("#"):
                        line = file.readline()
                        continue
                    elif "." in fields[0]:
                        line = file.readline()
                        continue
                    elif "-" in fields[0]:
                        line = file.readline()
                        continue
                    else:
                        token = Token(fields[1], head_id=int(fields[6]))
                        token.add_label("lemma", str(fields[2]))
                        token.add_label("upos", str(fields[3]))
                        token.add_label("pos", str(fields[4]))
                        token.add_label("dependency", str(fields[7]))

                        if len(fields) > 9 and 'SpaceAfter=No' in fields[9]:
                            token.whitespace_after = False

                        for morph in str(fields[5]).split("|"):
                            if "=" not in morph:
                                continue
                            token.add_label(
                                morph.split("=")[0].lower(), morph.split("=")[1]
                            )

                        if len(fields) > 10 and str(fields[10]) == "Y":
                            token.add_label("frame", str(fields[11]))

                        sentence.add_token(token)

                    line = file.readline()
        return sentence

Пример #4

Показать файл

Файл: treebanks.py Проект: phkuep/flair

    def __init__(self, path_to_conll_file: Union[str, Path], in_memory: bool = True):
        """
        Instantiates a column dataset in CoNLL-U format.

        :param path_to_conll_file: Path to the CoNLL-U formatted file
        :param in_memory: If set to True, keeps full dataset in memory, otherwise does disk reads
        """
        if type(path_to_conll_file) is str:
            path_to_conll_file = Path(path_to_conll_file)
        assert path_to_conll_file.exists()

        self.in_memory = in_memory
        self.path_to_conll_file = path_to_conll_file
        self.total_sentence_count: int = 0

        if self.in_memory:
            self.sentences: List[Sentence] = []
        else:
            self.indices: List[int] = []

        with open(str(self.path_to_conll_file), encoding="utf-8") as file:

            line = file.readline()
            position = 0
            sentence: Sentence = Sentence()
            while line:

                line = line.strip()
                fields: List[str] = re.split("\t+", line)
                if line == "":
                    if len(sentence) > 0:
                        self.total_sentence_count += 1
                        if self.in_memory:
                            self.sentences.append(sentence)
                        else:
                            self.indices.append(position)
                            position = file.tell()
                    sentence: Sentence = Sentence()

                elif line.startswith("#"):
                    line = file.readline()
                    continue
                elif "." in fields[0]:
                    line = file.readline()
                    continue
                elif "-" in fields[0]:
                    line = file.readline()
                    continue
                else:
                    token = Token(fields[1], head_id=int(fields[6]))
                    token.add_label("lemma", str(fields[2]))
                    token.add_label("upos", str(fields[3]))
                    token.add_label("pos", str(fields[4]))
                    token.add_label("dependency", str(fields[7]))

                    if len(fields) > 9 and 'SpaceAfter=No' in fields[9]:
                        token.whitespace_after = False

                    for morph in str(fields[5]).split("|"):
                        if "=" not in morph:
                            continue
                        token.add_label(morph.split("=")[0].lower(), morph.split("=")[1])

                    if len(fields) > 10 and str(fields[10]) == "Y":
                        token.add_label("frame", str(fields[11]))

                    sentence.add_token(token)

                line = file.readline()
            if len(sentence.tokens) > 0:
                self.total_sentence_count += 1
                if self.in_memory:
                    self.sentences.append(sentence)
                else:
                    self.indices.append(position)

Пример #5

Показать файл

    def token_list_to_sentence(self, token_list: conllu.TokenList) -> Sentence:
        sentence: Sentence = Sentence()

        # Build the sentence tokens and add the annotations.
        for conllu_token in token_list:
            token = Token(conllu_token["form"])

            for field in self.token_annotation_fields:
                field_value: Any = conllu_token[field]
                if isinstance(field_value, dict):
                    # For fields that contain key-value annotations,
                    # we add the key as label type-name and the value as the label value.
                    for key, value in field_value.items():
                        token.add_label(typename=key, value=str(value))
                else:
                    token.add_label(typename=field, value=str(field_value))

            if conllu_token.get("misc") is not None:
                space_after: Optional[str] = conllu_token["misc"].get(
                    "SpaceAfter")
                if space_after == "No":
                    token.whitespace_after = False

            sentence.add_token(token)

        if "sentence_id" in token_list.metadata:
            sentence.add_label("sentence_id",
                               token_list.metadata["sentence_id"])

        if "relations" in token_list.metadata:
            for (
                    head_start,
                    head_end,
                    tail_start,
                    tail_end,
                    label,
            ) in token_list.metadata["relations"]:
                # head and tail span indices are 1-indexed and end index is inclusive
                head = Span(sentence.tokens[head_start - 1:head_end])
                tail = Span(sentence.tokens[tail_start - 1:tail_end])

                sentence.add_complex_label(
                    "relation", RelationLabel(value=label,
                                              head=head,
                                              tail=tail))

        # determine all NER label types in sentence and add all NER spans as sentence-level labels
        ner_label_types = []
        for token in sentence.tokens:
            for annotation in token.annotation_layers.keys():
                if annotation.startswith(
                        "ner") and annotation not in ner_label_types:
                    ner_label_types.append(annotation)

        for label_type in ner_label_types:
            spans = sentence.get_spans(label_type)
            for span in spans:
                sentence.add_complex_label(
                    "entity",
                    label=SpanLabel(span=span,
                                    value=span.tag,
                                    score=span.score),
                )

        return sentence