def make_new_ot_morph_layer(old_text_obj,
                            new_text_obj,
                            new_layer='original_words_morph_analysis',
                            old_layer='ot_morph_analysis',
                            new_parent_layer='original_words'):
    '''Creates new 'original_words_morph_analysis' layer based on the old morph analysis layer.'''
    assert old_layer in old_text_obj.layers
    assert new_parent_layer in new_text_obj.layers
    assert new_layer not in new_text_obj.layers
    original_layer = old_text_obj[old_layer]
    assert 'normalized_text' not in original_layer.attributes
    parent_layer = new_text_obj[new_parent_layer]
    layer = Layer(name=new_layer,
                  text_object=new_text_obj,
                  attributes=('normalized_text', ) + original_layer.attributes,
                  parent=new_parent_layer,
                  ambiguous=True)
    assert len(parent_layer) == len(original_layer)
    for wid, parent_span in enumerate(parent_layer):
        old_morph_span = original_layer[wid]
        new_span = Span(base_span=parent_span.base_span, layer=layer)
        for ann in old_morph_span.annotations:
            new_annotation = {'normalized_text': parent_span.text}
            for a in layer.attributes:
                if a in ['start', 'end', 'text', 'normalized_text']:
                    continue
                new_annotation[a] = ann[a]
            new_span.add_annotation(Annotation(new_span, **new_annotation))
        layer.add_span(new_span)
    return layer
def make_new_sentences_layer( old_text_obj, new_text_obj, new_layer='original_sentences_flat', old_layer='sentences' ):
    '''Creates new 'original_sentences_flat' layer based on the old sentences layer.'''
    assert old_layer in old_text_obj.layers
    assert new_layer not in new_text_obj.layers
    original_layer = old_text_obj[old_layer]
    layer = Layer(name = new_layer,
                  text_object = new_text_obj,
                  attributes  = original_layer.attributes,
                  parent   =None,
                  ambiguous=False)
    layer.meta['desc'] = 'Original sentence tokenization from koondkorpus XML file.'
    for span in original_layer:
        attribs = {}
        layer.add_annotation( (span.start, span.end), **attribs )
    return layer
Пример #3
0
 def test_intersection_more_elements(self):
     i = Intersection(
         self.adjectives(),
         self.ed_suffix_regex(),
         Layer('words'),
         self.ed_suffix()
     )
     matches = i.get_matches(self.text())
     self.assertListEqual(self.expected(), matches)
Пример #4
0
def make_new_words_layer(old_text_obj,
                         new_text_obj,
                         new_layer='original_words',
                         old_layer='words'):
    '''Creates new 'original_words' layer based on the old words layer.'''
    assert old_layer in old_text_obj.layers
    assert new_layer not in new_text_obj.layers
    original_layer = old_text_obj[old_layer]
    layer = Layer(name=new_layer,
                  text_object=new_text_obj,
                  attributes=original_layer.attributes,
                  parent=None,
                  ambiguous=True)
    layer.meta[
        'desc'] = 'Original word tokenization from koondkorpus XML file.'
    for span in original_layer:
        attribs = {'normalized_form': None}
        layer.add_annotation((span.start, span.end), **attribs)
    return layer
def remove_attribs_from_layer(text, layer_name, new_layer_name,
                              remove_attribs):
    ''' Rewrites given layer in a way that attributes from the list remove_attribs
        will be completely removed. Returns the new layer.
    '''
    new_attribs = [
        a for a in text[layer_name].attributes if a not in remove_attribs
    ]
    new_layer = Layer(
        name=new_layer_name,
        text_object=text,
        attributes=new_attribs,
        parent=text[layer_name].parent if text[layer_name].parent else None,
        ambiguous=text[layer_name].ambiguous)
    for span in text[layer_name]:
        for annotation in span.annotations:
            analysis = {attrib: annotation[attrib] for attrib in new_attribs}
            new_layer.add_annotation((span.start, span.end), **analysis)
    return new_layer
Пример #6
0
def import_from_brat_folder(folder):
    assert os.path.isdir( folder ), \
        "(!) Invalid folder name {!r}.".format(folder)
    annotation_files = dict()
    for fname in os.listdir(folder):
        if fname.endswith(('.ann', '.txt')):
            name, ext = os.path.splitext(fname)
            fpath = os.path.join(folder, fname)
            if name not in annotation_files:
                annotation_files[name] = []
            annotation_files[name].append(fpath)
    # Check that both .ann and .txt exist
    for name in annotation_files.keys():
        if len(annotation_files[name]) != 2:
            has_ann = any([
                fname for fname in annotation_files[name]
                if fname.endswith('.ann')
            ])
            has_txt = any([
                fname for fname in annotation_files[name]
                if fname.endswith('.txt')
            ])
            if not has_txt:
                raise ValueError(
                    '(!) Annotations file {!r} is missing .txt part.'.format(
                        name))
            if not has_ann:
                raise ValueError(
                    '(!) Annotations file {!r} is missing .ann part.'.format(
                        name))
    text_objects = []
    for name in annotation_files.keys():
        ann_file = [
            fname for fname in annotation_files[name] if fname.endswith('.ann')
        ][0]
        entity_annotations, rel_annotations = import_brat_annotations(ann_file)
        txt_file = [
            fname for fname in annotation_files[name] if fname.endswith('.txt')
        ][0]
        content = import_brat_text(txt_file)
        #
        #  Create text object and entity annotations
        #
        text_obj = Text(content)
        text_obj.meta['file'] = name
        brat_entities = \
            Layer('brat_entities', attributes=('brat_id',), text_object = text_obj)
        event_layer = \
            Layer('events', attributes=('brat_id', 'class', 'class_confidence', 'duration', 'duration_confidence', 'comment'), \
                            text_object = text_obj, enveloping='brat_entities')
        timex_layer = \
            Layer('timexes', attributes=('brat_id', 'tid', 'type', 'value', 'mod', 'anchor_time_id', 'comment'), \
                             text_object = text_obj, enveloping='brat_entities')
        entity_layer = \
            Layer('entities', attributes=('brat_id',), text_object = text_obj, enveloping='brat_entities')
        entity_id_to_loc_map = dict()
        for (entity_id, type, start, end, attribs) in entity_annotations:
            # Check that location strings are expected ones
            # Collect corrected locations
            corrected_locs = []
            if isinstance(start, int):
                corrected_start, delta = _calculate_corrected_start_and_delta(
                    content, start)
                snippet = content[corrected_start:end + delta]
                assert snippet == attribs['text'], \
                    f"(!) {name!r} has mismatching entity texts {snippet!r} vs {attribs['text']!r}"
                corrected_locs.append((corrected_start, end + delta))
            elif isinstance(start, list):
                if len(start) == len(attribs['text']):
                    for s_start, s_end, s_text in zip(start, end,
                                                      attribs['text']):
                        corrected_start, delta = _calculate_corrected_start_and_delta(
                            content, s_start)
                        snippet = content[corrected_start:s_end + delta]
                        assert snippet == s_text, \
                            f"(!) {name!r} has mismatching entity texts {snippet!r} vs {attribs['text']!r}"
                        corrected_locs.append((corrected_start, s_end + delta))
                elif len(start) <= len(attribs['text']):
                    # Tricky case: there can be less entity locations than entity text strings
                    # (!) different number of entity texts ['oli', 'kõige', 'parem'] and start locs [1904, 1908]
                    assert len(start) == len(end)
                    for s_start, s_end in zip(start, end):
                        corrected_start, delta = _calculate_corrected_start_and_delta(
                            content, s_start)
                        snippet = content[corrected_start:s_end + delta]
                        assert any([s in snippet for s in attribs['text']]), \
                            f"(!) {name!r} has mismatching entity texts {snippet!r} vs {attribs['text']!r}"
                        corrected_locs.append((corrected_start, s_end + delta))
                else:
                    raise Exception(
                        '(!) Mismatching number of locations and texts in {!r}'
                        .format((entity_id, type, start, end, attribs)))
            # add base layer: brat entities
            for s_start, s_end in corrected_locs:
                brat_entities.add_annotation((s_start, s_end),
                                             **{'brat_id': entity_id})
            entity_id_to_loc_map[entity_id] = corrected_locs
            # add enveloping layers
            if type == 'Event':
                attribs['brat_id'] = entity_id
                event_layer.add_annotation(corrected_locs, **attribs)
            elif type == 'Timex':
                attribs['brat_id'] = entity_id
                timex_layer.add_annotation(corrected_locs, **attribs)
            elif type == 'Entity':
                attribs['brat_id'] = entity_id
                entity_layer.add_annotation(corrected_locs, **attribs)
        text_obj.add_layer(brat_entities)
        text_obj.add_layer(event_layer)
        text_obj.add_layer(timex_layer)
        text_obj.add_layer(entity_layer)
        #
        #  Add tlink relation annotations
        #
        relations_layer = \
            Layer('tlinks', attributes=('brat_id', 'a_text', 'rel_type', 'b_text', 'b_index'), \
                            text_object = text_obj, enveloping='brat_entities', ambiguous=True)
        for (rel_arg1, rel_type, rel_arg2, rel_id) in rel_annotations:
            if rel_type == 'has_Argument':
                continue
            assert rel_arg1 in entity_id_to_loc_map.keys()
            assert rel_arg2 in entity_id_to_loc_map.keys()
            arg1_loc = entity_id_to_loc_map[rel_arg1]
            arg2_loc = entity_id_to_loc_map[rel_arg2]
            # check if relation needs to be reversed
            if arg1_loc[0] > arg2_loc[0]:
                # reverse relation
                temp = arg1_loc
                arg1_loc = arg2_loc
                arg2_loc = temp
                # change reltype
                if rel_type == 'AFTER':
                    rel_type = 'BEFORE'
                elif rel_type == 'BEFORE':
                    rel_type = 'AFTER'
                elif rel_type == 'INCLUDES':
                    rel_type = 'IS_INCLUDED'
                elif rel_type == 'IS_INCLUDED':
                    rel_type = 'INCLUDES'
            attribs = {}
            attribs['brat_id'] = rel_id
            attribs['rel_type'] = rel_type
            attribs['a_text'] = ' '.join([content[s:e] for s, e in arg1_loc])
            attribs['b_text'] = ' '.join([content[s:e] for s, e in arg2_loc])
            attribs['b_index'] = len(arg1_loc)
            relations_layer.add_annotation(arg1_loc + arg2_loc, **attribs)
        text_obj.add_layer(relations_layer)
        #
        #  Add has_Argument relations
        #
        arguments_layer = \
            Layer('event_arguments', attributes=('brat_id', 'a_text', 'rel_type', 'b_text', 'b_index'), \
                               text_object = text_obj, enveloping='brat_entities', ambiguous=True)
        for (rel_arg1, rel_type, rel_arg2, rel_id) in rel_annotations:
            if rel_type != 'has_Argument':
                continue
            assert rel_arg1 in entity_id_to_loc_map.keys()
            assert rel_arg2 in entity_id_to_loc_map.keys()
            arg1_loc = entity_id_to_loc_map[rel_arg1]
            arg2_loc = entity_id_to_loc_map[rel_arg2]
            # check if relation needs to be reversed
            if arg1_loc[0] > arg2_loc[0]:
                # reverse relation
                temp = arg1_loc
                arg1_loc = arg2_loc
                arg2_loc = temp
                # change reltype
                rel_type = 'is_Argument_of'
            attribs = {}
            attribs['brat_id'] = rel_id
            attribs['rel_type'] = rel_type
            attribs['a_text'] = ' '.join([content[s:e] for s, e in arg1_loc])
            attribs['b_text'] = ' '.join([content[s:e] for s, e in arg2_loc])
            attribs['b_index'] = len(arg1_loc)
            arguments_layer.add_annotation(arg1_loc + arg2_loc, **attribs)
        text_obj.add_layer(arguments_layer)
        text_objects.append(text_obj)
    return text_objects
def read_from_tsv(path):
    texts = []
    tokens_tagger = WhiteSpaceTokensTagger()
    if os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            (head, tail) = os.path.split(root)
            if len(files) > 0:
                for file in files:
                    if not file.endswith(".tsv"):
                        continue
                    with open(os.path.join(root, file),
                              encoding="utf-8") as fin:
                        reader = csv.reader(fin, delimiter='\t')
                        words = []
                        #Lines containing a word and its' analysis
                        word = []
                        #Morphological analysis of the whole text.
                        morph_analysis = []
                        raw_text = ""
                        multiword_expressions = []
                        for index, row in enumerate(reader):
                            row[0] = row[0].strip()
                            #Check if the row has correct number of elements
                            #If there are less than 6 then it is probably an adverb, abbreviation etc.
                            #But if there are more, then there's something wrond and the user has to be notified.
                            if len(row) > 6:
                                #If the elements after the 6th one contain nothing, then we can continue.
                                for x in row[6:]:
                                    x = x.strip()
                                    if x != "":
                                        sys.stderr.write(
                                            "Something is wrong with the following file: "
                                            + file +
                                            " In the following line: " +
                                            str(index + 1) + "\n" +
                                            "\t".join(row) + "\n")
                                        sys.exit(1)
                            #If the first element of a row is empty then it is an alternative analysis of a word.
                            if row[0] == "" and word:
                                word.append(row)
                            else:
                                if len(word) != 0:
                                    words.append(word)
                                #After appending the word into the words list let's initialize a new word.
                                word = [row]
                        #As the loop terminates before adding the last word into the list, let's do it now
                        words.append(word)
                        for word in words:
                            #Iterate over the analyses and check for manual fixes.
                            #Remove all other analyses if they exist.
                            type_of_fix = ""
                            for analysis in word:
                                #As it may be sometimes necessary to look at the whole line, join the elements of a row back together.
                                line = "\t".join(analysis)
                                if "¤" in line:
                                    word[0][1:] = [
                                        None, None, None, None, None
                                    ]
                                    word = [word[0]]

                                    type_of_fix = "No_correct_analysis_available"
                                    break
                                elif analysis[1].startswith("@"):
                                    word[0][1:] = analysis[1:]
                                    word = [word[0]]
                                    word[0][1] = word[0][1].strip("@")
                                    type_of_fix = "correct_analysis_provided"
                                    break
                                elif analysis[1].startswith("£"):
                                    word[0][1:] = analysis[1:]
                                    word = [word[0]]
                                    word[0][1] = word[0][1].strip("£")
                                    type_of_fix = "correct_analysis_not_provided"
                                    break
                                elif re.match("#[A-Üa-ü0-9]", analysis[1]):
                                    word[0][1:] = analysis[1:]
                                    word = [word[0]]
                                    word[0][1] = word[0][1].strip("#")
                                    type_of_fix = "correct_analysis_manually_added"
                                    break
                            analyses = []
                            for a in word:
                                analysis = {}
                                analysis['root'] = a[1]
                                #If it is an abbreviation some fields may be missing.
                                #Sometimes there are also missing tabs in the end of a line, so the last element has to be checked.
                                if a[-1] == "Y" or a[-1] == 'D' or a[-1] == 'K':
                                    analysis['partofspeech'] = a[-1]
                                    analysis['ending'] = ""
                                    analysis['form'] = ""
                                    analysis['clitic'] = ""
                                else:
                                    analysis['ending'] = a[2]
                                    analysis['clitic'] = a[3]
                                    analysis['partofspeech'] = a[4]
                                    analysis['form'] = a[5] if len(
                                        a) == 6 else ""
                                if analysis['root'] != None:
                                    analysis['root'], analysis[
                                        'root_tokens'], analysis[
                                            'lemma'] = _postprocess_root(
                                                analysis['root'],
                                                analysis['partofspeech'])
                                else:
                                    analysis['root_tokens'] = None
                                    analysis['lemma'] = None
                                analysis['type_of_fix'] = type_of_fix
                                #If not otherwize specified the normalized_text will remain the same as the word form
                                analysis['normalized_text'] = word[0][0]
                                analyses.append(analysis)
                            #if len(analyses) > 1:
                            #	print (analyses)
                            word_tuple = (word[0][0], analyses)
                            morph_analysis.append(word_tuple)
                            raw_text += word[0][0] + " "
                            if ' ' in word[0][0]:
                                multiword_expressions.append(word[0][0])
                        text = Text(raw_text)

                        tokens_layer = tokens_tagger.make_layer(text)
                        multiword_expressions = [
                            mw.split() for mw in multiword_expressions
                        ]
                        compound_tokens_tagger = PretokenizedTextCompoundTokensTagger(
                            multiword_units=multiword_expressions)
                        compound_tokens_layer = compound_tokens_tagger.make_layer(
                            text, layers={'tokens': tokens_layer})
                        word_tagger = WordTagger()
                        words_layer = word_tagger.make_layer(
                            text,
                            layers={
                                'compound_tokens': compound_tokens_layer,
                                'tokens': tokens_layer
                            })
                        #text.tag_layer(['sentences'])
                        layer_morph = Layer(name='manual_morph',
                                            text_object=text,
                                            attributes=[
                                                'root', 'lemma', 'root_tokens',
                                                'ending', 'clitic',
                                                'partofspeech', 'form'
                                            ],
                                            ambiguous=True)
                        layer_fix = Layer(name='type_of_fix',
                                          text_object=text,
                                          attributes=['type_of_fix'],
                                          parent='manual_morph')

                        for ind, word in enumerate(words_layer):
                            layer_fix.add_annotation(
                                (word.start, word.end),
                                type_of_fix=analysis['type_of_fix'])
                            for analysis in morph_analysis[ind][1]:
                                layer_morph.add_annotation(
                                    (word.start, word.end), **analysis)
                        text.add_layer(layer_morph)
                        text.add_layer(layer_fix)
                        text.meta['id'] = file.split(".")[0]
                        text.meta['location'] = root.split(os.sep)[-1].lower()
                        texts.append(text)
    return texts
Пример #8
0
def create_new_text_obj( fname, metadata, cur_text_len, cur_tokens, cur_tok_id, \
                         raw_timexes, timexes_layer_name='gold_timexes' ):
    '''Based on the snapshot of data collected from the file, creates a 
       new EstNLTK v1.6 Text object, and populates with metadata and gold 
       standard timexes layer. Returns the Text object.'''
    # Construct new text object
    text_str = ''.join(cur_tokens)
    assert len(text_str) == cur_text_len
    text_obj = Text(text_str)
    # Add metadata
    text_obj.meta['source_file'] = fname
    assert len(metadata) >= 1
    if len(metadata) > 1:
        print(
            'Warn! Unexpected number of metadata items {!r}. Using only first.'
            .format(metadata))
    for (k, v) in metadata[0].items():
        text_obj.meta[k] = v
    text_obj.meta['_original_token_count'] = cur_tok_id
    # Add document creation date
    for timex in raw_timexes:
        if 'functionInDocument' in timex and \
            timex['functionInDocument'] == 'CREATION_TIME':
            assert 'value' in timex
            text_obj.meta['document_creation_time'] = timex['value']
            if 'comment' in timex:
                text_obj.meta['dct_comment'] = timex['comment']
            break
    # Add TIMEX-es layer
    timexes_layer = Layer(name=timexes_layer_name, \
                          attributes=('tid', 'type', 'value', 'temporal_function', 'anchor_time_id', \
                                      'mod', 'quant', 'freq', 'begin_point', 'end_point', 'part_of_interval', \
                                      'comment' ), \
                          text_object=text_obj,\
                          ambiguous=False)
    for timex in raw_timexes:
        if '_start' in timex and '_end' in timex:
            # Determine if this TIMEX is part of an interval (without textual content)
            interval_timex, place_in_interval = get_parent_of_interval(
                timex, raw_timexes)
            if interval_timex:
                if interval_timex.get('type', None) == 'DURATION':
                    # Record interval timex as an implicit timex
                    interval_timex_odict = convert_timex_to_ordered_dict(
                        interval_timex)
                    timex['part_of_interval'] = interval_timex_odict
                else:
                    raise Exception(
                        '(!) Unexpected interval_timex {!r} for timex {!r}'.
                        format(interval_timex, timex))
            # Determine if this TIMEX is an implicit interval that has explicit timepoints
            # in text. If so, skip it to avoid duplicates in annotations
            if is_removable_interval_timex(timex, raw_timexes):
                continue
            # Determine if this is an explicit interval with one or more implicit time points
            # If so, then attach the implicit time points as OrderedDict-s
            begin_point_tmx, end_point_tmx = get_child_timepoints(
                timex, raw_timexes, only_implicit=True)
            if begin_point_tmx:
                begin_point_odict = convert_timex_to_ordered_dict(
                    begin_point_tmx)
                timex['beginPoint'] = begin_point_odict
            if end_point_tmx:
                end_point_odict = convert_timex_to_ordered_dict(end_point_tmx)
                timex['endPoint'] = end_point_odict
            # Determine exact position of the timex:
            if 'text' not in timex:
                # Timexes without pre-specified textual position/substring:
                #  _start and _end provide all the information we need
                loc = (timex['_start'], timex['_end'])
                annotations = convert_timex_attributes(copy.deepcopy(timex))
                for k in annotations.keys():
                    if k not in timexes_layer.attributes:
                        raise Exception(
                            '(!) Unexpceted key {!r} in {!r}'.format(
                                k, annotations))
                timexes_layer.add_annotation(loc, **annotations)
            elif 'text' in timex:
                # Timexes with pre-specified textual position/substring:
                #  we need to detect exact indexes of position in text
                loc = (timex['_start'], timex['_end'])
                textual_content = timex['text']
                timex_span = text_obj.text[loc[0]:loc[1]]
                if re.sub('\s+', '', textual_content) == timex_span:
                    # A) strings match if spaces are removed from text, e.g.
                    #    text="31. 12. 1997.a."  vs token="31.12.1997.a."
                    loc = (timex['_start'], timex['_end'])
                    annotations = convert_timex_attributes(
                        copy.deepcopy(timex))
                    for k in annotations.keys():
                        if k not in timexes_layer.attributes:
                            raise Exception(
                                '(!) Unexpceted key {!r} in {!r}'.format(
                                    k, annotations))
                    timexes_layer.add_annotation(loc, **annotations)
                elif re.sub('\s+', '',
                            textual_content) == re.sub('\s+', '', timex_span):
                    # B) strings match if spaces are removed from both text and token, e.g.
                    #    text="täna kell 19. 08"  vs token="täna kell 19.08"
                    loc = (timex['_start'], timex['_end'])
                    annotations = convert_timex_attributes(
                        copy.deepcopy(timex))
                    for k in annotations.keys():
                        if k not in timexes_layer.attributes:
                            raise Exception(
                                '(!) Unexpceted key {!r} in {!r}'.format(
                                    k, annotations))
                    timexes_layer.add_annotation(loc, **annotations)
                elif textual_content in timex_span:
                    # C) text is a substring of the phrase, e.g.
                    #    text="1899-"  vs  token="1899-1902"
                    i = text_obj.text.find(textual_content, timex['_start'])
                    if i > -1 and i + len(textual_content) <= loc[1]:
                        new_start = i
                        new_end = i + len(textual_content)
                        assert text_obj.text[
                            new_start:new_end] == textual_content
                        loc = (new_start, new_end)
                        annotations = convert_timex_attributes(
                            copy.deepcopy(timex))
                        for k in annotations.keys():
                            if k not in timexes_layer.attributes:
                                raise Exception(
                                    '(!) Unexpceted key {!r} in {!r}'.format(
                                        k, annotations))
                        timexes_layer.add_annotation(loc, **annotations)
                    else:
                        raise Exception(
                            '(!) Unable to detect location of the timex {!r}'.
                            format(timex))
                else:
                    # D) Tricky situation: text only overlaps the phrase.
                    #    So, we must find out its true indexes in text.
                    i = 0
                    candidate_locs = []
                    while (text_obj.text.find(textual_content, i) > -1):
                        i = text_obj.text.find(textual_content, i)
                        j = i + len(textual_content)
                        if locations_overlap(timex['_start'], timex['_end'], i,
                                             j):
                            # if there is an overlap between the token location
                            # and timex location, then we have a candidate
                            if [i, j] not in candidate_locs:
                                candidate_locs.append([i, j])
                        i = j
                    if len(candidate_locs) == 0:
                        # Try to search when spaces are removed
                        textual_content = re.sub('\s+', '', textual_content)
                        i = 0
                        while (text_obj.text.find(textual_content, i) > -1):
                            i = text_obj.text.find(textual_content, i)
                            j = i + len(textual_content)
                            if locations_overlap(timex['_start'],
                                                 timex['_end'], i, j):
                                # if there is an overlap between the token location
                                # and timex location, then we have a candidate
                                if [i, j] not in candidate_locs:
                                    candidate_locs.append([i, j])
                            i = j
                    if len(candidate_locs) == 1:
                        # Exactly one location: all clear!
                        new_start = candidate_locs[0][0]
                        new_end = candidate_locs[0][1]
                        assert text_obj.text[
                            new_start:new_end] == textual_content
                        loc = (new_start, new_end)
                        annotations = convert_timex_attributes(
                            copy.deepcopy(timex))
                        for k in annotations.keys():
                            if k not in timexes_layer.attributes:
                                raise Exception(
                                    '(!) Unexpceted key {!r} in {!r}'.format(
                                        k, annotations))
                        timexes_layer.add_annotation(loc, **annotations)
                    elif len(candidate_locs) > 1:
                        stretch = text_obj.text[
                            candidate_locs[0][0]:candidate_locs[-1][-1]]
                        raise Exception(
                            '(!) Multiple possible locations {!r} detected for the timex {!r} in {!r}'
                            .format(candidate_locs, timex, stretch))
                    elif len(candidate_locs) == 0:
                        loc = (timex['_start'], timex['_end'])
                        print(text_obj.text[loc[0]:loc[1]])
                        raise Exception(
                            '(!) Unable to detect location of the timex {!r}'.
                            format(timex))
    text_obj.add_layer(timexes_layer)
    return text_obj