def _createannotationobjects(annotations):
    """
    Create instances of the Annotation class for each of the "T" annotations.

    Input is assumed to only be "T" annotations.

    :param annotations: (dict) dictionary of "T" annotations.
    :return: (OrderedDict) an ordered dictionary of Annotations objects.
    Length of this dictionary should be equal to the input dictionary.
    """
    targets = OrderedDict()

    for key, t in annotations.items():
        splitted = t.split("\t")
        t = splitted[0]
        repr = u" ".join(splitted[1:])

        split = t.split()
        label = split[0]

        spans = [[int(span.split()[0]), int(span.split()[1])]
                 for span in u" ".join(split[1:]).split(";")]

        targets[key] = Annotation(key, repr, spans, [label])

    return targets
Exemplo n.º 2
0
def _createannotationobjects(annotations):
    """
    Create instances of the Annotation class for each of the "T" annotations.

    Input is assumed to only be "T" annotations.

    :param annotations: (dict) dictionary of "T" annotations.
    :return: (OrderedDict) an ordered dictionary of Annotations objects.
    Length of this dictionary should be equal to the input dictionary.
    """
    targets = OrderedDict()

    for key, t in annotations.items():
        splitted = t.split("\t")
        t = splitted[0]
        repr = u" ".join(splitted[1:])

        split = t.split()
        label = split[0]
        #         print("t",t)
        #         print("key",key)
        try:
            #ann formate by brat
            spans = [[int(span.split()[0]),
                      int(span.split()[1])]
                     for span in u" ".join(split[1:]).split(";")]
        except:
            #patch ann formate by BioNLP
            split.append(splitted[1])
            split.append(splitted[2])
            spans = [[int(span.split()[0]),
                      int(span.split()[1])]
                     for span in u" ".join(split[1:]).split(";")]

        targets[key] = Annotation(key, repr, spans, [label])
        targets[key].type = 'T'

    return targets
Exemplo n.º 3
0
def importxml(filename):
    """
    Imports an XML file formatted with the format created by this program. Used for persistency and to operate on
    RepoModels in memory.

    :param filename: (string) the path to the file to be imported.
    :return: A tuple containing a dictionary of annotations and a list of dictionaries representing the context.
    """

    anndict = OrderedDict()
    sentobjects = []

    with codecs.open(filename, 'r', encoding='utf-8') as f:
        data = f.read()

    doc = etree.fromstring(data)

    sentences, annotations = doc.getchildren()

    for s in sentences.getchildren():

        repr = " ".join([w.text for w in s.getchildren()])
        sentobjects.append(
            Sentence(key=s.get('id').split(".")[1],
                     line=repr,
                     start=int(s.get("start"))))

    for annotation in annotations.getchildren():

        id = unicode(annotation.get('id')[3:])
        repr = unicode(annotation.get('repr'))
        spans = [[int(y) for y in x.split("|")]
                 for x in annotation.get('spans').split(",")]

        ann = Annotation(id, repr, spans)

        for span in ann.spans:
            for s in sentobjects:

                start, end = span
                ann.words.extend(s.getwordsinspan(start, end))

        anndict[id] = ann

    for annotation in annotations.getchildren():

        id = unicode(annotation.get('id')[3:])
        ann = anndict[id]

        for x, y in {
                unicode(x): unicode(y)
                for x, y in annotation.attrib.items()
                if x not in ["id", "repr", "spans", "words"]
        }.items():

            if x.startswith("link."):
                ann.links[x[5:]].extend(
                    [anndict[key[3:]] for key in y.split()])
            else:
                ann.labels[x].append(y)

    return anndict, sentobjects