Python Textbound.Textbound примеры использования

Язык программирования: Python

Пространство имен/Пакет: standoff

Класс/Тип: Textbound

Метод/Функция: Textbound

Примеров на hotexamples.com: 4

Python Textbound.Textbound - 4 примера найдено. Это лучшие примеры Python кода для standoff.Textbound.Textbound, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Textbound(4)

from_standoff(1)

Основные методы

Textbound (4)

from_standoff (1)

Пример #1

Показать файл

def take_stats(txt, ann, fn, stats, options):
    annotations = []
    for ln, line in enumerate(ann.splitlines(), start=1):
        if not line or line.isspace() or line[0] not in 'TN':
            info('skipping line {} in {}: {}'.format(ln, fn, line))
        if line[0] == 'T':
            id_, type_span, text = line.split('\t')
            type_, span = type_span.split(' ', 1)
            stats[ENTITY_TYPE][type_] += 1
            stats[ENTITY_TEXT][text] += 1
            stats[TEXT_BY_TYPE.format(type_)][text] += 1
            stats[TOTALS]['textbounds'] += 1
            if len(span.split(';')) > 1:
                stats[FRAGMENTED_SPAN][type_] += 1
            annotations.append(Textbound(id_, type_, span, text))
        elif line[0] == 'N':
            id_, type_rid_tid, text = line.split('\t')
            type_, rid, tid = type_rid_tid.split(' ')
            if (tid.startswith(TAXONOMY_PREFIX)
                    and options.taxdata is not None):
                tax_id = tid[len(TAXONOMY_PREFIX):]
                rank = options.taxdata.get_rank(tax_id)
                if rank == '<UNKNOWN>':
                    stats[TAXONOMY_UNKNOWN][tax_id] += 1
                division = options.taxdata.get_division(tax_id)
                stats[TAXONOMY_RANK][rank] += 1
                stats[TAXONOMY_DIV][division] += 1
                stats[TAXONOMY_RANK_DIV]['/'.join([rank, division])] += 1
                stats[TEXT_BY_RANK.format(rank)][text] += 1
            stats[TOTALS]['normalizations'] += 1
        else:
            assert False, 'internal error'
    stats[TOTALS]['documents'] += 1

    is_consistent = True
    overlapping = find_overlapping(annotations)
    for t1, t2 in overlapping:
        sorted_types = '{}-{}'.format(*sorted([t1.type, t2.type]))
        if t1.span_matches(t2):
            if t1.type == t2.type:
                # same span, different types
                is_consistent = False
            stats[SAME_SPAN][sorted_types] += 1
            stats[SAME_SPAN_TEXT][t1.text] += 1
        elif t1.contains(t2):
            stats[CONTAINMENT]['{} in {}'.format(t2.type, t1.type)] += 1
            stats[CONTAINMENT_TEXT]['{} in {}'.format(t2.text, t1.text)] += 1
        elif t2.contains(t1):
            stats[CONTAINMENT]['{} in {}'.format(t1.type, t2.type)] += 1
            stats[CONTAINMENT_TEXT]['{} in {}'.format(t1.text, t2.text)] += 1
        elif t1.span_crosses(t2):
            is_consistent = False
            stats[CROSSING_SPAN]['{}/{}'.format(t1.type, t2.type)] += 1
            stats[CROSSING_SPAN_TEXT]['{}/{}'.format(t1.text, t2.text)] += 1
        else:
            assert False, 'internal error'
    if is_consistent:
        stats[CONSISTENCY]['consistent'] += 1
    else:
        stats[CONSISTENCY]['inconsistent'] += 1

Пример #2

Показать файл

Файл: document.py Проект: zy200459/standoff2conll

    def standoffs(self, index):
        """Return sentence annotations as list of Standoff objects."""

        textbounds = []
        for type_, start, end in self.get_tagged():
            tstart, tend = start-self.base_offset, end-self.base_offset
            textbounds.append(Textbound('T%d' % index, type_, start, end,
                                        self.text[tstart:tend]))
            index += 1
        return textbounds

Пример #3

Показать файл

def make_textbound(type_, span_str, text):
    id_ = generate_id('T')
    spans = []
    for span in span_str.split(';'):
        start, end = (int(i) for i in span.split())
        spans.append((start, end))
    min_start = min(s[0] for s in spans)
    max_end = max(s[1] for s in spans)
    if len(spans) > 1:
        warning('replacing fragmented span {} with {} {}'.format(
            span_str, min_start, max_end))
    return Textbound(id_, type_, min_start, max_end, text)

Пример #4

Показать файл

def mentions_to_standoffs(mentions, options):
    standoffs = []
    # Mentions with identical span and type map to one textbound with
    # multiple normalizations.
    grouped = defaultdict(list)
    for m in mentions:
        grouped[(m.start, m.end, m.typename, m.text)].append(m)
    t_idx, n_idx = count(1), count(1)
    for (start, end, type_, text), group in sorted(grouped.items()):
        t_id = 'T{}'.format(next(t_idx))
        standoffs.append(Textbound(t_id, type_, start, end, text))
        for m in group:
            n_id = 'N{}'.format(next(n_idx))
            n_name = get_norm_name(m.serial, m.text, options)
            # if we have a species name, add it to the norm text
            if m.species:
                n_name = n_name + ' ({})'.format(m.species)
            norm_id = get_norm_id(m.serial, 'TAGGER:{}'.format(m.serial),
                                  options)
            norm_id = rewrite_norm_id(norm_id, type_, m.species)
            standoffs.append(Normalization(n_id, t_id, norm_id, n_name))
    return standoffs