def from_standoff(cls, text, annotations, sentence_split=True, discont_rule=None, overlap_rule=None, filter_types=None, exclude_types=None, tokenization_re=None, document_id=None): """Return Document given text and standoff annotations.""" # first create a document from the text without annotations # with all "out" tags (i.e. "O"), then re-tag the tokens based # on the textbounds. textbounds = parse_textbounds(annotations, discont_rule) document = cls.from_text(text, sentence_split, textbounds, tokenization_re=tokenization_re) if document_id is not None: document.id = document_id if filter_types: textbounds = filter_textbounds(textbounds, filter_types) if exclude_types: textbounds = filter_textbounds(textbounds, exclude_types, exclude=True) verify_textbounds(textbounds, text) textbounds = eliminate_overlaps(textbounds, overlap_rule) retag_document(document, textbounds) return document
def from_standoff_to_spert(cls, text, annotations, sentence_split=True, discont_rule=None, overlap_rule=None, filter_types=None, exclude_types=None, tokenization_re=None, document_id=None): """Return Document given text and standoff annotations.""" # todo: убрать костыль с исключением отношений textbounds, relations = parse_textbounds( annotations, discont_rule, exclude_relations=exclude_types) document = cls.from_text(text, sentence_split, textbounds, tokenization_re=tokenization_re) if document_id is not None: document.id = document_id if filter_types: textbounds = filter_textbounds(textbounds, filter_types) if exclude_types: textbounds = filter_textbounds(textbounds, exclude_types, exclude=True) verify_textbounds(textbounds, text) relation_dict = { arg1: (relation, arg1, arg2) for (relation, arg1, arg2) in relations } docs = convert_documents(document, textbounds, relation_dict) return docs