Exemplo n.º 1
0
def bioconcepts2pubtator_annotations(tag, index):
    """Bioconcepts to Annotations
    Specifically for bioconcepts2pubtator and converts each annotation
    into an annotation object that BioC can parse.
    Keyword Arguments:
    tag -- the annotation line that was parsed into an array
    index -- the id of each document specific annotation
    """

    annt = BioCAnnotation()
    annt.id = str(index)
    annt.infons["type"] = tag["type"]

    # If the annotation type is a Gene,Species, Mutation, SNP
    # Write out relevant tag
    tag_type = tag['type'] or ''
    tag_id = tag['tag_id']
    if tag_type == "Gene":
        annt.infons["NCBI Gene"] = tag_id

    elif tag_type == "Species":
        annt.infons["NCBI Species"] = tag_id

    elif "Mutation" in tag_type:
        annt.infons["tmVar"] = tag_id

    elif "SNP" in tag_type:
        annt.infons["tmVar"] = tag_id

    else:
        # If there is no MESH ID for an annotation
        if tag_id:
            # check to see if there are multiple mesh tags
            if "|" in tag_id:
                # Write out each MESH id as own tag
                for tag_num, ids in enumerate(tag_id.split("|")):
                    # Some ids dont have the MESH:#### form so added case to that
                    if ":" not in ids:
                        annt.infons["MESH {}".format(tag_num)] = tag_id
                    else:
                        term_type, term_id = ids.split(":")
                        annt.infons["{} {}".format(term_type,
                                                   tag_num)] = term_id
            else:
                # Some ids dont have the MESH:#### form so added case to that
                if ":" in tag_id:
                    term_type, term_id = tag_id.split(":")
                    annt.infons[term_type] = term_id
                else:
                    annt.infons["MESH"] = tag_id
        else:
            annt.infons["MESH"] = "Unknown"

    location = BioCLocation()
    location.offset = str(tag["start"])
    location.length = str(len(tag["term"]))
    annt.locations.append(location)
    annt.text = tag["term"]
    return annt
def add_annotation(triple, annotation_id):
    # initialize annotation element
    bioc_annotation = BioCAnnotation()
    # MeSH term in a tag <text> ... </text> (origininal term, searched case insensitive)
    bioc_annotation.text = triple[2]
    # generate XML structure for the annotation and add infon
    bioc_annotation.id = str(annotation_id)
    bioc_annotation.put_infon('type', 'MeSH term')
    # add location element
    bioc_location = BioCLocation()
    # add length of MeSH term
    bioc_location.length = str(triple[1])
    # add start position (offset) 
    bioc_location.offset = str(triple[0])
    bioc_annotation.add_location(bioc_location)
    return bioc_annotation
Exemplo n.º 3
0
def add_annotation(triple, annotation_id):
    # initialize annotation element
    bioc_annotation = BioCAnnotation()
    # MeSH term in a tag <text> ... </text> (origininal term, searched case insensitive)
    bioc_annotation.text = triple[2]
    # generate XML structure for the annotation and add infon
    bioc_annotation.id = str(annotation_id)
    bioc_annotation.put_infon('type', 'MeSH term')
    # add location element
    bioc_location = BioCLocation()
    # add length of MeSH term
    bioc_location.length = str(triple[1])
    # add start position (offset)
    bioc_location.offset = str(triple[0])
    bioc_annotation.add_location(bioc_location)
    return bioc_annotation
Exemplo n.º 4
0
def brat2bioc_entity(bratentity: BratEntity) -> BioCAnnotation:
    ann = BioCAnnotation()
    ann.id = bratentity.id
    ann.text = bratentity.text
    ann.infons['type'] = bratentity.type
    for span in bratentity.locations:
        ann.add_location(BioCLocation(span.begin, span.end - span.begin))
    return ann
 def to_bioc(self):
     entity_bioc = BioCAnnotation()
     entity_bioc.infons['type'] = self.type
     entity_bioc.text = self.text
     entity_bioc.id = str(self.id)
     location = BioCLocation(self.start, len(self.text))
     entity_bioc.add_location(location)
     return entity_bioc
Exemplo n.º 6
0
 def __parse_annotation(self, tree):
     annotation = BioCAnnotation()
     annotation.id = tree.attrib['id']
     annotation.infons = self.__parse_infons(tree)
     annotation.text = tree.findtext('text')
     for child in tree.findall('location'):
         annotation.add_location(
             BioCLocation(int(child.attrib['offset']), int(child.attrib['length'])))
     return annotation
Exemplo n.º 7
0
 def __read_annotation(self, start_elem):
     ann = BioCAnnotation()
     ann.id = start_elem.get('id')
     while self.__has_next():
         event, elem = self.__next_event()
         if event == 'start':
             pass
         elif event == 'end':
             if elem.tag == 'text':
                 ann.text = elem.text
             elif elem.tag == 'infon':
                 ann.infons[elem.get('key')] = elem.text
             elif elem.tag == 'location':
                 ann.add_location(BioCLocation(int(elem.get('offset')), int(elem.get('length'))))
             elif elem.tag == 'annotation':
                 return ann
     raise RuntimeError("should not reach here")  # pragma: no cover