Python Sentence примеры использования

Язык программирования: Python

Пространство имен/Пакет: bratreader.sentence

Класс/Тип: Sentence

Примеров на hotexamples.com: 5

Python Sentence - 5 примеров найдено. Это лучшие примеры Python кода для bratreader.sentence.Sentence, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Sentence(5)

Основные методы

Sentence (5)

Пример #1

Показать файл

Файл: annotationimporter.py Проект: WeSIG/Delta

def importann(pathtofile):
    """
    Import ann and .txt files from a folder.

    :param pathtofile: (string) the path to the folder containing both the
    .ann and .txt files.
    :return: a tuple containing a dictionary of annotations and a string,
    representing the text of the document
    """
    annotations = readannfile(pathtofile)
    path, extension = os.path.splitext(pathtofile)

    sentences = []
    text = []

    char_index = 0

    for sent_index, line in enumerate(open(path + ".txt", encoding='utf-8')):
        sentences.append(Sentence(sent_index, line, char_index))
        char_index += len(line)

    _join(annotations.values(), sentences)

    with open(path + ".txt", encoding='utf-8') as input_file:
        text = input_file.read()

    return sentences, text

Пример #2

Показать файл

Файл: annotationimporter.py Проект: tonywangcn/bratreader

def importann(pathtofile):
    """
    Import ann and .txt files from a folder.

    :param pathtofile: (string) the path to the folder containing both the
    .ann and .txt files.
    :return: a tuple containing a dictionary of annotations and a string,
    representing the text of the document
    """
    annotations = readannfile(pathtofile)
    path, extension = os.path.splitext(pathtofile)

    sentences = []

    char_index = 0

    ##this is a horrible hack because i want to preserve newlines
    with open(path + ".txt", encoding='utf8') as f:
        for sent_index, line in enumerate(f.read().split('\n\n')):
            sentences.append(Sentence(sent_index, line, char_index))
            char_index += len(line) + len('\n\n')

    # for sent_index, line in enumerate(open(path + ".txt", encoding='utf-8')):
    #     sentences.append(Sentence(sent_index, line, char_index))
    #     char_index += len(line)

    _join(annotations.values(), sentences)
    return sentences

Пример #3

Показать файл

def importann(pathtofile):
    """
    Imports ann and .txt files from a folder.

    :param pathtofile: (string) the path to the folder containing both the .ann and .txt files.
    :return: a tuple containing a dictionary of annotations and a string, representing the text of the document
    """

    annotations = readannfile(pathtofile)
    context = _readcontext(".".join(pathtofile.split(".")[:-1] + ["txt"]))

    sentences = []

    index = 0

    for sindex, line in enumerate(context.splitlines()):

        sentences.append(Sentence(sindex, line, index))
        index += len(line)+1

    _couple(annotations.values(), sentences)

    return annotations, sentences

Пример #4

Показать файл

Файл: annotationimporter.py Проект: Gusyatnikova/argument-mining-rus

def importann(pathtofile):
    """
    Import ann and .txt files from a folder.

    :param pathtofile: (string) the path to the folder containing both the
    .ann and .txt files.
    :return: a tuple containing a dictionary of annotations and a string,
    representing the text of the document
    """
    annotations = readannfile(pathtofile)
    path, extension = os.path.splitext(pathtofile)

    sentences = []

    char_index = 0

    # for sent_index, line in enumerate(open((path + ".txt"), errors='ignore')):
    for sent_index, line in enumerate(
            open((path + ".txt"), errors='ignore', encoding="utf-8")):
        sentences.append(Sentence(sent_index, line, char_index))
        char_index += len(line) + 1

    _join(annotations.values(), sentences)
    return sentences

Пример #5

Показать файл

Файл: xml.py Проект: sajeewanbasuru/MSc

def importxml(filename):
    """
    Imports an XML file formatted with the format created by this program. Used for persistency and to operate on
    RepoModels in memory.

    :param filename: (string) the path to the file to be imported.
    :return: A tuple containing a dictionary of annotations and a list of dictionaries representing the context.
    """

    anndict = OrderedDict()
    sentobjects = []

    with codecs.open(filename, 'r', encoding='utf-8') as f:
        data = f.read()

    doc = etree.fromstring(data)

    sentences, annotations = doc.getchildren()

    for s in sentences.getchildren():

        repr = " ".join([w.text for w in s.getchildren()])
        sentobjects.append(
            Sentence(key=s.get('id').split(".")[1],
                     line=repr,
                     start=int(s.get("start"))))

    for annotation in annotations.getchildren():

        id = unicode(annotation.get('id')[3:])
        repr = unicode(annotation.get('repr'))
        spans = [[int(y) for y in x.split("|")]
                 for x in annotation.get('spans').split(",")]

        ann = Annotation(id, repr, spans)

        for span in ann.spans:
            for s in sentobjects:

                start, end = span
                ann.words.extend(s.getwordsinspan(start, end))

        anndict[id] = ann

    for annotation in annotations.getchildren():

        id = unicode(annotation.get('id')[3:])
        ann = anndict[id]

        for x, y in {
                unicode(x): unicode(y)
                for x, y in annotation.attrib.items()
                if x not in ["id", "repr", "spans", "words"]
        }.items():

            if x.startswith("link."):
                ann.links[x[5:]].extend(
                    [anndict[key[3:]] for key in y.split()])
            else:
                ann.labels[x].append(y)

    return anndict, sentobjects