def evaluate_single_file(manual_folder, automatic_folder, results_folder,
                         file_name):
    LOGGER.debug(f"Processing {file_name}")
    manual_file = textgrids.TextGrid(os.path.join(manual_folder, file_name))
    automatic_file = textgrids.TextGrid(
        os.path.join(automatic_folder, f"automatic_{file_name}"))
    base_name = file_name.replace(".TextGrid", "")
    manual_intervals = manual_file[base_name]
    automatic_intervals = automatic_file[base_name]

    silences_out = 0
    silence_only_in_automatic = 0
    silence_in_both = 0
    total_automatic_silences = 0
    total_spoken_segments = 0
    manual_iterator = iter(manual_intervals)
    current_manual_interval = next(manual_iterator)
    for automatic_interval in automatic_intervals:
        if not automatic_interval.text:
            continue

        if automatic_interval.text.strip() == CONSTANT_SIL:
            if automatic_interval.xmin > current_manual_interval.xmax:
                current_manual_interval = next(manual_iterator)

            total_automatic_silences += 1
            if automatic_interval.xmin < current_manual_interval.xmax < automatic_interval.xmax:
                silence_in_both += 1
                current_manual_interval = next(manual_iterator)
                if current_manual_interval.text.strip():
                    total_spoken_segments += 1
            else:
                if not current_manual_interval.text.strip():
                    silences_out += 1
                else:
                    silence_only_in_automatic += 1
            if current_manual_interval.xmax < automatic_interval.xmax:
                current_manual_interval = next(manual_iterator)
                if current_manual_interval.text.strip():
                    total_spoken_segments += 1
    results = f"""Results
silences_out = {silences_out}
silence_only_in_automatic = {silence_only_in_automatic}  # False Positives
silence_in_both = {silence_in_both}  # True Positives
excluded_silences = {total_spoken_segments - silence_in_both}  # False negatives
total_automatic_silences = {total_automatic_silences}
total_spoken_segments = {total_spoken_segments}
(silence_in_both / total_spoken_segments) * 100 = {(silence_in_both / (total_spoken_segments or 1)) * 100 }
"""
    results_file = open(os.path.join(results_folder, f"{base_name}.results"),
                        "w+")
    results_file.write(results)
    results_file.close()

    return silences_out, silence_only_in_automatic, silence_in_both, total_automatic_silences, total_spoken_segments
Exemplo n.º 2
0
def create_text_files(wav_folder, transcription_folder, output_folder):
    for wav_file in os.listdir(wav_folder):
        try:
            file_name = wav_file.replace(".wav", "")
            LOGGER.info(f"Processing {file_name}")
            frequency, signal = wavfile.read(
                os.path.join(wav_folder, f"{file_name}.wav"))
            text_grid = textgrids.TextGrid(
                os.path.join(transcription_folder, f"{file_name}.TextGrid"))
            intervals = text_grid[file_name]
            initial_second = -1
            end_second = signal.shape[-1] / frequency
            for interval in intervals:
                if interval.text == "1":
                    initial_second = interval.xmin
                if interval.text == "" and initial_second > -1:
                    end_second = interval.xmin
                    break
            cropped_signal = signal[int(initial_second *
                                        frequency):int(end_second * frequency)]
            output_path = os.path.join(output_folder, f"{file_name}.wav")
            LOGGER.info(f"Saving cropped file in {output_path}")
            wavfile.write(output_path, frequency, cropped_signal)

        except FileNotFoundError:
            LOGGER.error(f"File not found {file_name}")
def complete_missing_numbers_for_single_annotation(text_folder, output_folder,
                                                   text_file):
    current_annotation = textgrids.TextGrid(
        os.path.join(text_folder, text_file))
    transcription_name = text_file.replace(".TextGrid", "")
    intervals = current_annotation[transcription_name]
    LOGGER.info(f"Analyzing {text_file}")
    LOGGER.info(f"-------------------")
    current_index = None
    last_manually_annotated_index = None
    first_index_found = False
    for interval in intervals:
        try:
            last_manually_annotated_index = int(interval.text)
            LOGGER.info(
                f"Manually annotated_index found: {last_manually_annotated_index}"
            )
            if current_index and last_manually_annotated_index != current_index + 1:
                LOGGER.error(
                    f"Consistency problem at {last_manually_annotated_index}")
            current_index = last_manually_annotated_index
            first_index_found = True
        except ValueError:
            LOGGER.debug(f"Value error with {interval.text}")
            if isinstance(current_index, int):
                LOGGER.debug(
                    f"Current index {current_index} {type(current_index)}")
                current_index = current_index + 1
        if first_index_found:
            LOGGER.debug(f"Writing {current_index}")
            interval.text = textgrids.Transcript(str(current_index))

    current_annotation.write(os.path.join(output_folder, text_file))
Exemplo n.º 4
0
def split_and_name_textgrids(
        original_folder="/original_en_diapix_data_changed_textgrids",
        destination_folder='/split_wav_files_folder/'):
    cwd = os.getcwd()
    textgrid_list = get_textgrids_for_each_speaker(folder=original_folder)
    for file_name in textgrid_list:
        channel1, channel2 = get_file_names(file_name)
        grid = textgrids.TextGrid(file_name)
        chan1, chan2 = combine_dfs(grid)
        # save to csv
        chan1.to_csv(cwd + '/' + destination_folder + '/' + channel1 +
                     ".TextGrid")
        chan2.to_csv(cwd + '/' + destination_folder + '/' + channel2 +
                     ".TextGrid")
Exemplo n.º 5
0
def save_grid(infile):
    # Read a textgrid and return a list of a list of the phoneme and its timings
    phonemes = []
    try:
        grid = textgrids.TextGrid(infile)
        if grid['MAU'][0].text == '<p:>':
            displace = float(grid['MAU'][0].xmax)
        else:
            displace = 0
        for it in grid['MAU']:
            phonemes.append(
                [it.text, (it.xmin - displace, it.xmax - displace)])
    except TypeError:
        print('File ' + infile + ' not in proper TextGrid format')

    return phonemes
Exemplo n.º 6
0
def extract_label_timesteps_from_file(
        path: str, labels: Tuple[str, ...]) -> labels_timesteps_data_type:
    # TODO: write description
    try:
        grid = textgrids.TextGrid(path)
    except Exception:
        a = 1 + 2
    result_intervals = {}
    for label in labels:
        result_intervals[label] = []
    for item in grid['ORT']:
        label = str(item.text)
        if label in labels:
            result_intervals[label].append((item.xmin, item.xmax))

    return result_intervals
Exemplo n.º 7
0
def main(csv_col, path_in, path_out):
    filename = csv_col[1].replace(".wav", ".TextGrid")
    filepath = f'{path_in}/{csv_col[4]}/{filename}'

    f_in = textgrids.TextGrid(filepath)
    f_in.write(f'{path_out}/{filename}')

    max = f_in.xmax

    for _ in range(len(f_in)):
        pop(f_in)

    add_tiers("segment", f_in, max, "")
    add_tiers("target", f_in, max, csv_col[2])
    add_tiers("gloss", f_in, max, csv_col[3])
    add_tiers("sentence", f_in, max, csv_col[1])
    add_tiers("archive id", f_in, max, csv_col[0])

    f_in.write(filepath)
def write_text_grid_from_segmentation(segmentation,
                                      text_name,
                                      output_folder,
                                      xmin=0,
                                      xmax=0,
                                      audio_frequency=16000):
    tg = textgrids.TextGrid()
    tg.xmin = xmin
    tg.xmax = xmax
    tier = textgrids.Tier()
    tg[text_name] = tier
    previous_segment = 0
    print(f"tokens: {len(segmentation)}")
    for xmin, xmax in segmentation:
        tier.append(
            textgrids.Interval("", previous_segment / audio_frequency,
                               xmin / audio_frequency))
        tier.append(
            textgrids.Interval("sil", xmin / audio_frequency,
                               xmax / audio_frequency))
        previous_segment = xmax
    tg.write(os.path.join(output_folder, f"automatic_{text_name}.TextGrid"))
Exemplo n.º 9
0
def textgrids2csv(path):
    path += '/'
    root, dirs, all_files = next(os.walk(path))
    textgrid_idx = np.squeeze(np.where([fl.split('.')[-1]=='TextGrid' for fl in all_files]))
    print(textgrid_idx)
    files = [all_files[idx] for idx in textgrid_idx]
    print(files)
    
    for fl in files:
        grid = textgrids.TextGrid(path+fl)
        clip = grid['clip']
        print(clip)

        opFile = path + '/' + fl.split('.')[0] + '.csv'
        fid = open(opFile, 'a+', encoding = 'utf-8')
        heading = 'label,duration,xmin,xmax'
        fid.write(heading + '\n')
        for intval in clip:
            print(intval.text, intval.dur, intval.xmin, intval.xmax)
            values = intval.text + ',' + str(intval.dur) + ',' + str(intval.xmin) + ',' + str(intval.xmax)
            fid.write(values + '\n')    
        fid.close()
def evaluate_single_file(text_folder, wav_folder, results_folder, file_name):
    LOGGER.debug(f"Processing {file_name}")
    text = " ".join(
        open(os.path.join(text_folder, f"{file_name}.txt"), 'r').readlines())
    tokenized_text = sent_tokenize(text)
    tokenized_text = tokenized_text[1:]
    segments, audio_frequency = extract_segments_from_file(
        os.path.join(wav_folder, f"{file_name}.wav"))
    min_length = min(len(tokenized_text), len(segments))
    tg = textgrids.TextGrid()
    tg.xmin = 0
    tg.xmax = segments[-1][1] / audio_frequency
    tier = textgrids.Tier()
    tg[file_name] = tier
    previous_segment = 0
    for i in range(min_length):
        xmin, xmax = segments[i]
        tier.append(
            textgrids.Interval(tokenized_text[i],
                               previous_segment / audio_frequency,
                               xmin / audio_frequency))
        previous_segment = xmax
    tg.write(
        os.path.join(results_folder, f"silence_aligned_{file_name}.TextGrid"))
            except AttributeError:
                print(tree)
                pass

        pw_dict = {}

        outfile = os.path.join(out_dir,
                               'word_times_' + sentence_id + '.pickle')

        textgrid_file = os.path.join(
            path_to_textgrids, sentence_id2speaker[sentence_id],
            sentence_id2speaker[sentence_id] + "-" +
            os.path.splitext(os.path.basename(file))[0].replace("_", "-") +
            ".TextGrid")

        grid = textgrids.TextGrid(textgrid_file)
        index = 0
        # get all alignments (based on recording)
        alignments = []
        for word in grid["words"]:
            label = word.text.transcode()
            if label != "":
                alignments.append(word)

        #alignment_words = [word.text.transcode() for word in alignments]
        def remove(alignments):
            print([(word_from_tg.text.transcode(), word_from_transcription)
                   for word_from_tg, word_from_transcription in zip(
                       alignments, transcription)])
            if all(word_from_tg.text.transcode() == word_from_transcription
                   for word_from_tg, word_from_transcription in zip(
Exemplo n.º 12
0
def main():

    try:

        # création d'un objet Sound avec notre fichier audio
        snd = parselmouth.Sound(SOUND_PATH_FILE)

        # on récupere la fréquence d'échantillonnage
        frequency = snd.get_sampling_frequency()

        # on récupere l'intensité de l'enregistrement
        record_intensity = snd.get_intensity()

        # on crée un nouvel objet Sound
        new_sound = call("Create Sound from formula", "fichier_synthese", 1, 0,
                         0.05, frequency, "0")

        # on ouvre le fichier textGrid
        segmentation = textgrids.TextGrid(GRID_PATH_FILE)

        # phrase à faire prononcer (par défaut, c'est la premiere)
        sentence = ORTHO_SENTENCES[0]

        # on récupere la liste de mots de la phrase, la position du verbe,
        # la position de la conjonction de coordination et si elle est présente et le nombre de mots
        phono_sentence, verb_offsets, conj_offset, nbr_words = convert_ortho_sentence(
            sentence)

        # on récupère le coefficient pour les algorithmes
        coefficient = get_coefficient(nbr_words)

        # pour chaque mot de la phrase
        for num_word in range(len(phono_sentence)):

            # mot avant le mot actuel word, valeur '_' au debut car ce phonème represente le silence de début ou de fin de phrases
            # dans mon textGrid. De plus, impossible de laisser une chaine vide pour l'indexage
            word_before = '_'
            # si le mot n'est pas le premier dans la phrase, alors on determine le mot qui le précède (pour les liaisons)
            if num_word != 0:
                word_before = phono_sentence[num_word - 1]
            # mot actuel
            word = phono_sentence[num_word]

            # pour chaque lettre du mot word, i est l'index de chaque lettre du mot word
            i = 0
            while i < len(word):

                middle_last_phon = middle_phon = None

                # phonème précédent
                last_text_phon = ''
                last_phon = None

                # on parcourt l'ensemble des intervals
                for j, phon in enumerate(segmentation['diphones']):

                    # si il s'agit du premier phonème du mot
                    if i == 0:

                        # on prend on compte le dernier phoneme du mot précédent et le premier du mot actuel pour les liaisons
                        if last_text_phon == word_before[
                                -1] and phon.text == word[i]:

                            # on calcule le milieu du phonème actuel et celui du précédent
                            middle_last_phon = last_phon.xmin + (
                                (last_phon.xmax - last_phon.xmin) / 2)
                            middle_phon = phon.xmin + (
                                (phon.xmax - phon.xmin) / 2)
                            # on incremente i de 1 = on passe au phoneme suivant et on sort de la boucle for
                            i += 1
                            break

                    else:

                        # si il s'agit du dernier phoneme de la phrase, on ajoute le diphone de pause de fin de phrase
                        # le signe de pause est '_' dans mon textGrid
                        if num_word == len(phono_sentence) - 1 and i == len(
                                word) - 1:

                            if last_text_phon == word[i] and phon.text == '_':

                                # on calcule le milieu du phonème actuel et celui du précédent
                                middle_last_phon = last_phon.xmin + (
                                    (last_phon.xmax - last_phon.xmin) / 2)
                                middle_phon = phon.xmin + (
                                    (phon.xmax - phon.xmin) / 2)
                                # on incremente i de 1 = on passe au phoneme suivant et on sort de la boucle for
                                i += 1
                                break

                        else:
                            # sinon, on cherche les diphones dans le mot
                            if last_text_phon == word[
                                    i - 1] and phon.text == word[i]:

                                # on calcule le milieu du phonème actuel et celui du précédent
                                middle_last_phon = last_phon.xmin + (
                                    (last_phon.xmax - last_phon.xmin) / 2)
                                middle_phon = phon.xmin + (
                                    (phon.xmax - phon.xmin) / 2)
                                # on incremente i de 1 = on passe au phoneme suivant et on sort de la boucle for
                                i += 1
                                break

                    # si le diphone est introuvable, on passe au phoneme suivant du mot word
                    # il s'agit le plus souvent d'une liaison non prise en compte
                    if j == len(segmentation['diphones']) - 1:
                        print(
                            'AVERTISSEMENT : Un des diphones est introuvable !'
                        )
                        i += 1

                    # on passe au phoneme suivant dans le textgrid
                    # le phoneme actuel devient le phoneme précédent
                    last_phon = phon
                    last_text_phon = phon.text

                # si le diphone a été trouvé dans le textgrid
                if middle_phon is not None and middle_last_phon is not None:

                    # on recupere l'intersection avec zéro la plus proche pour le milieu de chaque phonème
                    middle_last_phon = snd.get_nearest_zero_crossing(
                        middle_last_phon, 1)
                    middle_phon = snd.get_nearest_zero_crossing(middle_phon, 1)

                    # on extrait le diphone voulu dans la variable extrait
                    extrait = snd.extract_part(
                        middle_last_phon, middle_phon,
                        parselmouth.WindowShape.RECTANGULAR, 1, False)

                    # création d'un objet Manipulation pour modifier la frequence
                    # et la durée de l'extrait
                    manipulation = call(extrait, "To Manipulation", 0.001, 75,
                                        600)

                    frequence = get_frequency_with_word(
                        num_word, verb_offsets, conj_offset, coefficient)
                    relative_duration = get_relative_duration_with_word(
                        num_word, verb_offsets, conj_offset, coefficient)
                    intensity = get_intensity(num_word, verb_offsets,
                                              nbr_words, record_intensity)

                    # on modifie la fréquence fondamentale de l'extrait
                    extrait = alter_pitch(extrait, manipulation, frequence)
                    # on modifie la durée de l'extrait
                    extrait = alter_duration(extrait, manipulation,
                                             relative_duration)
                    # on modifie l'intensité de l'extrait
                    extrait.scale_intensity(intensity)

                    # on concatène le diphone obtenu avec new_sound
                    new_sound = new_sound.concatenate([new_sound, extrait])

        # on sauvegarde le résultat dans un fichier .wav
        new_sound.save(RESULT_PATH_FILE, parselmouth.SoundFileFormat.WAV)

    # si une erreur se produit
    except Exception as error:
        print('Une erreur s\'est produite : {}'.format(error))
Exemplo n.º 13
0
#!/usr/bin/env python3
# created by Mana ASHIDA, 12-09-2020, ver.1.0

import glob
import csv
import textgrids
from tqdm import tqdm

print("What is the path to the folder containing target TextGrids?")
path_in = input()
paths = glob.glob(path_in+"/*.TextGrid")

f = open(path_in+"/durations.csv", "w")
writer = csv.writer(f)

for path in tqdm(sorted(paths)):
    f_in = textgrids.TextGrid(path)
    writer.writerow([path.split("/")[-1], f_in.xmax])

f.close()
print("duration.csv is generated in "+path_in)
Exemplo n.º 14
0
def transform_textgrids(oArgs, errHandle):
    """Transform the textgrids in the input directory"""

    # Initialisations
    src_ext = ".TextGrid"
    outfile_name = "jasmintg.xlsx"
    headers = ['child', 'tier1', 'tier2', 'tier5', 'tier6_L', 'tier6_N']

    try:
        dirInput = oArgs['input']
        dirOutput = oArgs['output']
        force = oArgs['force']
        debug = oArgs['debug']

        # Determine the output file
        outfile = os.path.join(dirOutput, outfile_name)

        # Start a workbook
        wb = openpyxl.Workbook()
        ws = wb.get_active_sheet()
        ws.title = "Data"

        # Set up the column headers
        for col_num in range(len(headers)):
            c = ws.cell(row=1, column=col_num + 1)
            c.value = headers[col_num]
            c.font = openpyxl.styles.Font(bold=True)
            # Set width to a fixed size
            ws.column_dimensions[get_column_letter(col_num + 1)].width = 8.0

        # Walk all the files in the input
        lst_src = [
            os.path.join(dirInput, f) for f in os.listdir(dirInput)
            if os.path.isfile(os.path.join(dirInput, f)) and src_ext in f
        ]

        row_num = 1
        for file in lst_src:
            row_num += 1
            # Get the name of the child from the name of the file
            child = os.path.basename(file).replace(src_ext, "")

            # Show where we are
            errHandle.Status("child = {}".format(child))

            # Read the textgrid file
            grid = textgrids.TextGrid(file)

            # Access the tiers that we need
            counter = 1
            for k, v in grid.items():
                if counter == 1:
                    tier1 = v
                elif counter == 2:
                    tier2 = v
                elif counter == 5:
                    tier5 = v
                elif counter == 6:
                    tier6 = v

                counter += 1

            offset2 = 0
            offset5 = 0
            offset6 = 0

            # Walk through all the items in tier1
            for idx, t1 in enumerate(tier1):

                # Get the corresponding values in the other tiers
                t2 = tier2[idx + offset2]
                t5 = tier5[idx + offset5]
                t6 = tier6[idx + offset6]

                while not is_close(
                        t1, t2
                ) and t2.xmin < t1.xmin and idx + offset2 < len(tier2) - 1:
                    offset2 += 1
                    t2 = tier2[idx + offset2]
                while not is_close(t1, t2) and t2.xmin > t1.xmin:
                    offset2 -= 1
                    t2 = tier2[idx + offset2]

                while not is_close(
                        t1, t5
                ) and t5.xmin < t1.xmin and idx + offset5 < len(tier5) - 1:
                    offset5 += 1
                    t5 = tier5[idx + offset5]
                while not is_close(t1, t5) and t5.xmin > t1.xmin:
                    offset5 -= 1
                    t5 = tier5[idx + offset5]

                while not is_close(
                        t1, t6
                ) and t6.xmin < t1.xmin and idx + offset6 < len(tier6) - 1:
                    offset6 += 1
                    t6 = tier6[idx + offset6]
                while not is_close(t1, t6) and t6.xmin > t1.xmin:
                    offset6 -= 1
                    t6 = tier6[idx + offset6]

                # Check if all tiers synchronize
                if is_close(t1, t2) and is_close(t1, t5) and is_close(t1, t6):
                    # All is well: process
                    arCombi = t6.text.split("/")
                    letter = ""
                    number = ""
                    if len(arCombi) == 1:
                        v = arCombi[0]
                        if re.match("^\d+$", v):
                            number = v
                            letter = ""
                        else:
                            number = ""
                            letter = v
                    elif len(arCombi) == 2:
                        letter = arCombi[0].strip()
                        number = arCombi[1].strip()

                    # Create list of values
                    row = [child, t1.text, t2.text, t5.text, letter, number]

                    for idx, v in enumerate(row):
                        cell_this = ws.cell(row=row_num, column=idx + 1)
                        cell_this.value = v
                        cell_this.alignment = openpyxl.styles.Alignment(
                            wrap_text=False)

                    # We are going to the next row
                    row_num += 1

                else:
                    # Synchronization problem
                    msg = "Synchronization problem in [{}] tier {} t1={} t2={} t5={} t6={}".format(
                        child, idx, t1.xmin, t2.xmin, t5.xmin, t6.xmin)
                    errHandle.Status(msg)

        # Save the result
        wb.save(outfile)

        return True
    except:
        errHandle.DoError("transform_textgrids")
        return False
Exemplo n.º 15
0
import textgrids

# ----------------------------------------------------------
# Input TextGrid from CLII
if len(sys.argv) < 3:
    print("Usage:", sys.argv[0], '<filename> <tiername>')
    exit()
fname = sys.argv[1]
tname = sys.argv[2]

if not os.path.isfile(fname):
    print("File", fname, "does not exist.")
    exit()

try:
    grid = textgrids.TextGrid(fname)
    print("Reading the Textgrid file..." + fname)
except (textgrids.ParseError, textgrids.BinaryError):
    print("Not a recognised file format!", file=sys.stderr)

fname = sys.argv[1].split('.')[0]
# grid = grid.write(fname+'.TextGrid', fmt=TEXT_LONG).TextGrid()

tier = ''
for syll in grid[tname]:
    label = syll.text.transcode()
    smin = '{:.4f}'.format(syll.xmin)
    smax = '{:.4f}'.format(syll.xmax)
    sdur = '{:.4f}'.format(syll.dur)
    interval = label+'\t'+smin+'\t'+smax+'\t'+sdur+'\t'+fname+'\n'
    tier += interval