Exemplo n.º 1
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """
        try:
            print("File: " + file_name, file=sys.stderr)
            eaf = Eaf(file_name)
            eaf.add_lexicon_ref(LEXICON_REF, NAME, TYPE, URL, LEXICON_ID,
                                LEXICON_NAME, DATCAT_ID, DATCAT_NAME)

            # Remove old referred lexicon
            if LINGUISTIC_TYPE_ID in eaf.linguistic_types:
                if "LEXICON_REF" in eaf.linguistic_types[LINGUISTIC_TYPE_ID]:
                    old_lexicon_ref = eaf.linguistic_types[LINGUISTIC_TYPE_ID][
                        "LEXICON_REF"]
                    del eaf.lexicon_refs[old_lexicon_ref]

            eaf.linguistic_types[LINGUISTIC_TYPE_ID][
                "LEXICON_REF"] = LEXICON_REF

            if self.output_dir is not None:
                eaf.to_file(self.output_dir + os.sep +
                            os.path.basename(urlparse(file_name).path),
                            pretty=True)
            else:
                eaf.to_file(file_name, pretty=True)
        except Exception:
            print("The EAF %s could not be processed." % file_name,
                  file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 2
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        try:
            eaf = Eaf(file_name)
            videos = [
                os.path.basename(
                    urlparse(media_descriptors['MEDIA_URL']).path)
                for media_descriptors in eaf.media_descriptors
                if media_descriptors['MIME_TYPE'] == 'video/mpeg'
            ]
            duration = self.find_max_duration(videos)
            if duration == 0.0:
                print("Duration could not be determined.", file=sys.stderr)
            else:
                annotation_values = self.create_annotation_values(file_name)
                self.add_new_annotations(eaf, annotation_values, duration)
                eaf.to_file(self.output_dir + os.sep +
                            os.path.basename(urlparse(file_name).path),
                            pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name,
                  file=sys.stderr)
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """
        try:
            print("File: " + file_name, file=sys.stderr)
            eaf = Eaf(file_name)
            eaf.add_lexicon_ref(LEXICON_REF, NAME, TYPE, URL,
                                LEXICON_ID, LEXICON_NAME, DATCAT_ID, DATCAT_NAME)

            # Remove old referred lexicon
            if LINGUISTIC_TYPE_ID in eaf.linguistic_types:
                if "LEXICON_REF" in eaf.linguistic_types[LINGUISTIC_TYPE_ID]:
                    old_lexicon_ref = eaf.linguistic_types[LINGUISTIC_TYPE_ID]["LEXICON_REF"]
                    del eaf.lexicon_refs[old_lexicon_ref]

            eaf.linguistic_types[LINGUISTIC_TYPE_ID]["LEXICON_REF"] = LEXICON_REF

            if self.output_dir is not None:
                eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
            else:
                eaf.to_file(file_name, pretty=True)
        except Exception:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 4
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """
        try:
            eaf = Eaf(file_name)
            self.process_eaf(eaf, file_name)
            eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 5
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """
        try:
            eaf = Eaf(file_name)
            self.process_eaf(eaf, file_name)
            eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 6
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        gloss_lingtypes = ["Gloss Child", "Gloss Adult"]
        external_ref = "ecv_ref"
        ecv_name = "ASL Signbank lexicon"

        try:
            eaf = Eaf(file_name)

            # Add linguistic types
            for lingtype in gloss_lingtypes:
                eaf.add_linguistic_type(lingtype, constraints=None)

            # Add linguistic types to tiers
            gloss_tiers = self.find_gloss_tiers(eaf)
            for tier in gloss_tiers:
                if "Adult" in tier:
                    eaf.tiers[tier][2]['LINGUISTIC_TYPE_REF'] = "Gloss Adult"
                elif "Child" in tier:
                    eaf.tiers[tier][2]['LINGUISTIC_TYPE_REF'] = "Gloss Child"

            # Add an ECV external reference
            eaf.add_external_ref(
                external_ref, "ecv",
                "http://applejack.science.ru.nl/asl-signbank/static/ecv/asl.ecv"
            )

            # Add a Controlled Vocabulary
            eaf.add_controlled_vocabulary(ecv_name, external_ref)

            # Add the CV to linguistic types
            for lingtype in gloss_lingtypes:
                eaf.linguistic_types[lingtype][
                    'CONTROLLED_VOCABULARY_REF'] = ecv_name

            eaf.to_file(self.output_dir + os.sep +
                        os.path.basename(urlparse(file_name).path),
                        pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name,
                  file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 7
0
def make_elan(source_parent_dir, target_parent_dir):
    """
    Make elan files based on filenames of wav files
    Written for the TIDIGITS corpus, so some things are specific to the name formats of that corpus
    """

    for dirname, dirnames, filenames in os.walk(source_parent_dir):

        # print path to all subdirectories first.
        for subdirname in dirnames:
            print(os.path.join(dirname, subdirname))

        # print path to all filenames.
        for filename in filenames:
            if '.wav' in filename:
                parent, gender, child = dirname.split(os.path.sep)
                basename, ext = os.path.splitext(os.path.basename(filename))
                print(parent, gender, child, filename)

                source_path = os.path.join(source_parent_dir, gender, child)
                target_path = os.path.join(target_parent_dir, gender, child)

                if not os.path.exists(target_path):
                    print(target_path)
                    os.makedirs(target_path)

                # Audio file duration - use this as end timeslot
                duration = int(librosa.get_duration(filename=os.path.join(source_path, filename))*1000)

                # Make file annotation from filename (minus the suffix)
                annotation = " ".join([char for char in basename[:-1]])
                # These are specific to the TIDIGITS naming convention
                annotation = annotation.replace("o", "oh")
                annotation = annotation.replace("z", "zero")

                text = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

                print(filename, duration, annotation, text)

                # Make elan
                output_eaf = Eaf()
                output_eaf.add_tier('tx')
                output_eaf.insert_annotation('tx', 0, duration, text)
                output_eaf.add_linked_file(os.path.join(target_path, f'{basename}.wav'))

                output_eaf.to_file(os.path.join(target_path, f'{basename}.eaf'))
Exemplo n.º 8
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """
        try:
            eaf = Eaf(file_name)
            lex_ref = "signbank-lexicon-ref"
            eaf.add_lexicon_ref(lex_ref, "NGT-Signbank", "Signbank", "https://signbank.science.ru.nl/",
                                "NGT", "NGT", "Annotation Id Gloss", "Annotation Id Gloss")
            eaf.linguistic_types["gloss"]["LEXICON_REF"] = lex_ref

            eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 9
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        gloss_append_lingtype = "gloss-append"

        try:
            eaf = Eaf(file_name)
            eaf.add_linguistic_type(gloss_append_lingtype, constraints="Symbolic_Association")
            gloss_tiers = self.find_gloss_tiers(eaf)
            self.add_gloss_tier_children(eaf, gloss_tiers, gloss_append_lingtype, file_name)
            eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 10
0
def make_elans(input_dir: str, output_dir: str, copy_wavs: bool):
    """
    Make ELAN files based on filenames of WAV files and annotation from matching text file
    :param input_dir: Directory name of folder containing TXT and WAV audio files
    :param  output_dir: Directory name to save EAF files into
    :param copy_wavs: Setting whether or not to copy the WAV file to the output dir
    """
    # Process each file
    for _, _, filenames in os.walk(input_dir):

        for filename in filenames:
            if '.wav' in filename:
                basename, ext = os.path.splitext(os.path.basename(filename))
                print(basename)

                # Get audio file duration - use this as the EAF annotation's end timeslot
                duration = int(
                    librosa.get_duration(
                        filename=os.path.join(input_dir, filename)) * 1000)

                # Get annotation from the text file matching on file basename
                annotation = get_annotation(input_dir, basename)

                # Add any annotation cleaning here
                # annotation = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

                print(duration, annotation)

                # Make EAF file
                output_eaf = Eaf()
                # output_eaf.add_tier('default')
                output_eaf.insert_annotation('default', 0, duration,
                                             annotation)
                output_eaf.add_linked_file(
                    os.path.join(output_dir, f'{basename}.wav'))
                output_eaf.to_file(os.path.join(output_dir, f'{basename}.eaf'))

                # Copy WAV?
                if copy_wavs:
                    shutil.copyfile(os.path.join(input_dir, filename),
                                    os.path.join(output_dir, filename))
    print('>>> Done')
Exemplo n.º 11
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        gloss_lingtypes = ["Gloss Child", "Gloss Adult"]
        external_ref = "ecv_ref"
        ecv_name = "ASL Signbank lexicon"

        try:
            eaf = Eaf(file_name)

            # Add linguistic types
            for lingtype in gloss_lingtypes:
                eaf.add_linguistic_type(lingtype, constraints=None)

            # Add linguistic types to tiers
            gloss_tiers = self.find_gloss_tiers(eaf)
            for tier in gloss_tiers:
                if "Adult" in tier:
                    eaf.tiers[tier][2]['LINGUISTIC_TYPE_REF'] = "Gloss Adult"
                elif "Child" in tier:
                    eaf.tiers[tier][2]['LINGUISTIC_TYPE_REF'] = "Gloss Child"

            # Add an ECV external reference
            eaf.add_external_ref(external_ref, "ecv", "http://applejack.science.ru.nl/asl-signbank/static/ecv/asl.ecv")

            # Add a Controlled Vocabulary
            eaf.add_controlled_vocabulary(ecv_name, external_ref)

            # Add the CV to linguistic types
            for lingtype in gloss_lingtypes:
                eaf.linguistic_types[lingtype]['CONTROLLED_VOCABULARY_REF'] = ecv_name

            eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 12
0
def make_elans(spreadsheet: str, source: str, target: str):
    """
    Make ELAN files based on filenames of WAV files
    :param spreadsheet: Path and file name of the spreadsheet containing WAV filenames and matching annotations
    :param source: Directory name of folder containing WAV audio files
    :param  target: Directory name to save EAF files into
    """

    # Read spreadsheet data and convert to JSON format
    print('Loading data from spreadsheet')
    annotations = get_annotations(spreadsheet)

    # Process each file
    print('Processing WAVs')
    for _, _, filenames in os.walk(source):

        for filename in filenames:
            if '.wav' in filename:
                basename, ext = os.path.splitext(os.path.basename(filename))

                # Get audio file duration - use this as the EAF annotation's end timeslot
                duration = int(
                    librosa.get_duration(
                        filename=os.path.join(source, filename)) * 1000)

                # Get annotation from the source data matching on filename
                annotation = get_annotation(annotations, filename)

                # Add any annotation cleaning here
                # annotation = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

                print(filename, duration, annotation)

                # Make EAF file
                output_eaf = Eaf()
                output_eaf.add_tier('tx')
                output_eaf.insert_annotation('tx', 0, duration, annotation)
                output_eaf.add_linked_file(
                    os.path.join(target, f'{basename}.wav'))
                output_eaf.to_file(os.path.join(target, f'{basename}.eaf'))
    print('>>> Done')
Exemplo n.º 13
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        try:
            eaf = Eaf(file_name)
            videos = [os.path.basename(urlparse(media_descriptors['MEDIA_URL']).path)
                      for media_descriptors in eaf.media_descriptors
                      if media_descriptors['MIME_TYPE'] == 'video/mpeg']
            duration = self.find_max_duration(videos)
            if duration == 0.0:
                print("Duration could not be determined.", file=sys.stderr)
            else:
                annotation_values = self.create_annotation_values(file_name)
                self.add_new_annotations(eaf, annotation_values, duration)
                eaf.to_file(self.output_dir + os.sep + os.path.basename(urlparse(file_name).path), pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name, file=sys.stderr)
Exemplo n.º 14
0
    def process_file(self, file_name):
        """
        Processes one file.

        :param file_name:
        :return:
        """

        gloss_append_lingtype = "gloss-append"

        try:
            eaf = Eaf(file_name)
            eaf.add_linguistic_type(gloss_append_lingtype,
                                    constraints="Symbolic_Association")
            gloss_tiers = self.find_gloss_tiers(eaf)
            self.add_gloss_tier_children(eaf, gloss_tiers,
                                         gloss_append_lingtype, file_name)
            eaf.to_file(self.output_dir + os.sep +
                        os.path.basename(urlparse(file_name).path),
                        pretty=True)
        except IOError:
            print("The EAF %s could not be processed." % file_name,
                  file=sys.stderr)
            print(sys.exc_info()[0])
Exemplo n.º 15
0
def make_elans(input_dir: str, output_dir: str, copy_wavs: bool):
    """
    Make ELAN files based on filenames of WAV files and annotation from matching text file
    :param input_dir: Directory name of folder containing TXT and WAV audio files
    :param  output_dir: Directory name to save EAF files into
    :param copy_wavs: Setting whether or not to copy the WAV file to the output dir
    """
    # Process each file
    files = glob.glob(f'{input_dir}/**/*.txt', recursive=True)
    print(files)

    for filename in files:

        filepath, ext = os.path.splitext(filename)
        basename = os.path.splitext(os.path.basename(filepath))[0]
        subdirname = os.path.basename(os.path.dirname(filepath))

        sex = subdirname[0]
        participant = subdirname[1:]

        # SEX :== m | f
        # SPEAKER_ID :== <INITIALS><DIGIT>
        # INITIALS :== speaker initials, 3 letters
        # DIGIT :== number 0-9 to differentiate speakers with identical initials

        # print(filename)     # input/dr1/fmem0/sa2.txt
        # print(filepath)     # input/dr1/fmem0/sa2
        # print(subdirname)   # fmem0
        # print(basename)     # sa2
        # print(ext)          # txt

        # Get audio file duration - use this as the EAF annotation's end timeslot
        # duration = int(librosa.get_duration(filename=os.path.join(input_dir, filename))*1000)

        # Get annotation from the text file matching on file basename
        with open(filename, 'r', encoding='utf-8') as text_file:
            annotation = text_file.read()
        annotation_split = annotation.split()
        start = int(annotation_split[0])
        duration = int(annotation_split[1])
        # convert audio samples to seconds to ms
        duration = int(duration / 16000 * 1000)
        annotation_text = " ".join(annotation_split[2:])

        # Add any annotation cleaning here
        # annotation = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), annotation)

        print(start, duration, annotation_text)

        # Make EAF file
        output_eaf = Eaf()
        output_eaf.add_tier('default', part=participant)
        output_eaf.add_annotation('default', start, duration, annotation_text)
        output_eaf.add_linked_file(
            os.path.join(output_dir, f'{subdirname}-{basename}.wav'))
        output_eaf.to_file(
            os.path.join(output_dir, f'{subdirname}-{basename}.eaf'))

        # Copy WAV?
        # if copy_wavs:
        shutil.copyfile(
            f'{filepath}.wav',
            os.path.join(output_dir, f'{subdirname}-{basename}.wav'))

    print('>>> Done')
Exemplo n.º 16
0
def main():
    """
    File 1 has the utterance and utterance translation
    File 2 has the gloss
    File 3 is the destination
    """
    # Input files
    file_1 = 'input/file-1.eaf'
    file_2 = 'input/file-2.eaf'
    file_3 = 'input/new.eaf'

    # Tier names
    utterance_id_source_tier = "A_phrase-segnum-en"
    utterance_id_target_tier = "utterance_id"
    utterance_source_tier = "DDD_Transcription-txt-qaa-fonipa-x-eib"
    utterance_target_tier = "utterance"
    utterance_translation_source_tier = "DDD_Translation-gls-en"
    utterance_translation_target_tier = "utterance_translation"
    word_source_tier = "A_word-txt-qaa-fonipa-x-eib"
    word_target_tier = "grammatical_words"
    morph_source_tier = "A_morph-txt-qaa-fonipa-x-eib"
    gloss_source_tier = "A_morph-gls-en"
    gloss_target_tier = "gloss"

    # Set up the eaf objects
    eaf_1 = Eaf(file_1)
    eaf_2 = Eaf(file_2)
    eaf_3 = Eaf()

    # Remove default tier and copy media
    eaf_3.remove_tier("default")
    # eaf_3 = copy_media(eaf_1, eaf_3)
    """
    Copy annotation number tier from file 2
    tier-type default-lt
    <LINGUISTIC_TYPE GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="default-lt" TIME_ALIGNABLE="true"/>
    """
    print("Copying annotation numbers from file 2")
    utterance_id_type_params = {
        'LINGUISTIC_TYPE_ID': 'default-lt',
        'TIME_ALIGNABLE': 'true'
    }
    utterance_id_tier_params = {
        'LINGUISTIC_TYPE_REF': 'default-lt',
        'TIER_ID': utterance_id_target_tier
    }
    _tier_copy(source_eaf=eaf_2,
               target_eaf=eaf_3,
               source_tier_name=utterance_id_source_tier,
               target_tier_name=utterance_id_target_tier,
               override_params=utterance_id_tier_params)
    """
    Copy utterance tier from file 1
    LINGUISTIC_TYPE_REF="Blank"
    <LINGUISTIC_TYPE CONSTRAINTS="Symbolic_Association" GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="Blank" TIME_ALIGNABLE="false"/>
    """
    print("Copying utterance tier from file 1")
    blank_type_params = {
        'LINGUISTIC_TYPE_ID': 'Blank',
        'CONSTRAINTS': 'Symbolic_Association',
        'TIME_ALIGNABLE': 'false'
    }
    eaf_3.add_linguistic_type('Blank', param_dict=blank_type_params)
    utterance_tier_params = {
        'LINGUISTIC_TYPE_REF': 'Blank',
        'PARENT_REF': utterance_id_target_tier,
        'TIER_ID': utterance_target_tier
    }
    _tier_copy_to_ref(source_eaf=eaf_1,
                      target_eaf=eaf_3,
                      source_tier_name=utterance_source_tier,
                      target_tier_name=utterance_target_tier,
                      target_parent_tier_name=utterance_id_target_tier,
                      override_params=utterance_tier_params)
    """
    Copy utterance translation tier from file 1
    LINGUISTIC_TYPE_REF="Blank"
    <LINGUISTIC_TYPE CONSTRAINTS="Symbolic_Association" GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="Blank" TIME_ALIGNABLE="false"/>
    <TIER LINGUISTIC_TYPE_REF="Blank" PARENT_REF="utterance" PARTICIPANT="DDD" TIER_ID="utterance_translation">    
    """
    print("Copying utterance translation tier from file 1")
    utterance_translation_tier_params = {
        'LINGUISTIC_TYPE_REF': 'Blank',
        'PARENT_REF': utterance_target_tier,
        'TIER_ID': utterance_translation_target_tier
    }
    _ref_tier_copy(source_eaf=eaf_1,
                   target_eaf=eaf_3,
                   source_tier_name=utterance_translation_source_tier,
                   target_tier_name=utterance_translation_target_tier,
                   target_parent_tier_name=utterance_target_tier,
                   override_params=utterance_translation_tier_params)
    """
    Copy the word tier from file 2
        <LINGUISTIC_TYPE CONSTRAINTS="Symbolic_Subdivision" GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="word" TIME_ALIGNABLE="false"/>
        <TIER DEFAULT_LOCALE="qaa-fonipa-x-eib" LINGUISTIC_TYPE_REF="word" PARENT_REF="A_phrase-segnum-en" PARTICIPANT="DDD" TIER_ID="A_word-txt-qaa-fonipa-x-eib">

    """
    print("Copying word tier from file 2")
    word_type_params = {
        'LINGUISTIC_TYPE_ID': 'word',
        'CONSTRAINTS': 'Symbolic_Subdivision',
        'TIME_ALIGNABLE': 'false'
    }
    eaf_3.add_linguistic_type('word', param_dict=word_type_params)

    word_tier_params = {
        'LINGUISTIC_TYPE_REF': 'word',
        'PARENT_REF': utterance_target_tier,
        'TIER_ID': word_target_tier
    }

    _copy_symbolic_subdivision_tier(
        source_eaf=eaf_2,
        target_eaf=eaf_3,
        source_tier_name=word_source_tier,
        target_tier_name=word_target_tier,
        target_parent_tier_name=utterance_id_target_tier,
        override_params=word_tier_params)
    """
    Get all the annotations from -2 gloss tier (gloss_source_tier A_morph-gls-en)
    Join the glosses with "-" so there is a 1:1 match with word annotations
    <LINGUISTIC_TYPE CONSTRAINTS="Symbolic_Association" GRAPHIC_REFERENCES="false" LINGUISTIC_TYPE_ID="Blank" TIME_ALIGNABLE="false"/>
    <TIER LINGUISTIC_TYPE_REF="Blank" PARENT_REF="grammatical_words" TIER_ID="gloss">
    """
    print("Epic battle with words to get glosses from file 2")
    gloss_tier_params = {
        'LINGUISTIC_TYPE_REF': 'Blank',
        'PARENT_REF': word_target_tier,
        'TIER_ID': gloss_target_tier
    }
    # None of the pympi methods will suit this task, so let's do it manually.
    # Get all the data
    eaf_2_tiers = eaf_2.tiers
    eaf_2_timeslots = eaf_2.timeslots
    # A tier is of the form: {tier_name -> (aligned_annotations, reference_annotations, attributes, ordinal)},
    # Word and gloss tiers are ref_annotations, the second item in the tiers dict. See docs for more info about format.
    word_tier = eaf_2_tiers[word_source_tier][1]
    morph_tier = eaf_2_tiers[morph_source_tier][1]
    gloss_tier = eaf_2_tiers[gloss_source_tier][1]

    # Each reference annotation is of the form: [{id -> (reference, value, previous, svg_ref)}].
    # Start at the top of the hierarchy
    utterance_id_tier = eaf_2_tiers[utterance_id_source_tier][0]

    new_dict = dict()
    # For each utterance, get the words. For each word, get the glosses. Merge glosses for each word
    for utterance_id, utterance in utterance_id_tier.items():
        utt_start = eaf_2_timeslots[utterance[0]]
        utt_end = eaf_2_timeslots[utterance[1]]
        word_gloss: List[Union[int, List[str]]] = []
        for word_id, word in word_tier.items():
            if word[0] == utterance_id:
                glosses = []
                # Find morphs of this word...
                for morph_id, morph in morph_tier.items():
                    # ...by filtering on morph parents id matching the word id
                    if morph[0] == word_id:
                        for gloss_id, gloss in gloss_tier.items():
                            if gloss[0] == morph_id:
                                glosses.append(gloss[1])
                # Join glosses for this word with a dash
                word_gloss.append([word[1], '-'.join(glosses)])
        # Now, work out word duration (it is an even division of parent utterance duration)
        # Make this value the first item in the data list eg [word_duration, [word, gloss], [word, gloss], ...]
        num_segments = len(word_gloss)
        utt_dur = utt_end - utt_start
        word_dur = int(utt_dur / num_segments)
        word_gloss = [utt_start, word_dur] + word_gloss
        print("word gloss", word_gloss)
        new_dict[utterance_id] = word_gloss

    # Having worked all that out, now we can add a ref annotation tier.
    # but parent seems to now bubble all the way to the top.
    eaf_3.add_tier(gloss_target_tier,
                   ling='Blank',
                   parent=word_target_tier,
                   tier_dict=gloss_tier_params)
    # And some annotations
    for ann_id, annotation in new_dict.items():
        utt_start = annotation[0]
        word_dur = annotation[1]
        count = 0

        for ann in annotation[2:]:
            word_start = utt_start + word_dur * count
            id_tier = gloss_target_tier
            tier2 = word_target_tier
            value = ann[1]
            prev = None
            svg = None

            for aid, (ref_id, _value, _prev,
                      _) in eaf_3.tiers[tier2][1].items():
                if ann[0] == _value:
                    new_aid = eaf_3.generate_annotation_id()
                    eaf_3.tiers[id_tier][1][new_aid] = (aid, value, prev, svg)

            count = count + 1

    # Save the new file
    print("Saving object to file")
    eaf_3.to_file(file_3)