def evaluate_single_file(manual_folder, automatic_folder, results_folder, file_name): LOGGER.debug(f"Processing {file_name}") manual_file = textgrids.TextGrid(os.path.join(manual_folder, file_name)) automatic_file = textgrids.TextGrid( os.path.join(automatic_folder, f"automatic_{file_name}")) base_name = file_name.replace(".TextGrid", "") manual_intervals = manual_file[base_name] automatic_intervals = automatic_file[base_name] silences_out = 0 silence_only_in_automatic = 0 silence_in_both = 0 total_automatic_silences = 0 total_spoken_segments = 0 manual_iterator = iter(manual_intervals) current_manual_interval = next(manual_iterator) for automatic_interval in automatic_intervals: if not automatic_interval.text: continue if automatic_interval.text.strip() == CONSTANT_SIL: if automatic_interval.xmin > current_manual_interval.xmax: current_manual_interval = next(manual_iterator) total_automatic_silences += 1 if automatic_interval.xmin < current_manual_interval.xmax < automatic_interval.xmax: silence_in_both += 1 current_manual_interval = next(manual_iterator) if current_manual_interval.text.strip(): total_spoken_segments += 1 else: if not current_manual_interval.text.strip(): silences_out += 1 else: silence_only_in_automatic += 1 if current_manual_interval.xmax < automatic_interval.xmax: current_manual_interval = next(manual_iterator) if current_manual_interval.text.strip(): total_spoken_segments += 1 results = f"""Results silences_out = {silences_out} silence_only_in_automatic = {silence_only_in_automatic} # False Positives silence_in_both = {silence_in_both} # True Positives excluded_silences = {total_spoken_segments - silence_in_both} # False negatives total_automatic_silences = {total_automatic_silences} total_spoken_segments = {total_spoken_segments} (silence_in_both / total_spoken_segments) * 100 = {(silence_in_both / (total_spoken_segments or 1)) * 100 } """ results_file = open(os.path.join(results_folder, f"{base_name}.results"), "w+") results_file.write(results) results_file.close() return silences_out, silence_only_in_automatic, silence_in_both, total_automatic_silences, total_spoken_segments
def create_text_files(wav_folder, transcription_folder, output_folder): for wav_file in os.listdir(wav_folder): try: file_name = wav_file.replace(".wav", "") LOGGER.info(f"Processing {file_name}") frequency, signal = wavfile.read( os.path.join(wav_folder, f"{file_name}.wav")) text_grid = textgrids.TextGrid( os.path.join(transcription_folder, f"{file_name}.TextGrid")) intervals = text_grid[file_name] initial_second = -1 end_second = signal.shape[-1] / frequency for interval in intervals: if interval.text == "1": initial_second = interval.xmin if interval.text == "" and initial_second > -1: end_second = interval.xmin break cropped_signal = signal[int(initial_second * frequency):int(end_second * frequency)] output_path = os.path.join(output_folder, f"{file_name}.wav") LOGGER.info(f"Saving cropped file in {output_path}") wavfile.write(output_path, frequency, cropped_signal) except FileNotFoundError: LOGGER.error(f"File not found {file_name}")
def complete_missing_numbers_for_single_annotation(text_folder, output_folder, text_file): current_annotation = textgrids.TextGrid( os.path.join(text_folder, text_file)) transcription_name = text_file.replace(".TextGrid", "") intervals = current_annotation[transcription_name] LOGGER.info(f"Analyzing {text_file}") LOGGER.info(f"-------------------") current_index = None last_manually_annotated_index = None first_index_found = False for interval in intervals: try: last_manually_annotated_index = int(interval.text) LOGGER.info( f"Manually annotated_index found: {last_manually_annotated_index}" ) if current_index and last_manually_annotated_index != current_index + 1: LOGGER.error( f"Consistency problem at {last_manually_annotated_index}") current_index = last_manually_annotated_index first_index_found = True except ValueError: LOGGER.debug(f"Value error with {interval.text}") if isinstance(current_index, int): LOGGER.debug( f"Current index {current_index} {type(current_index)}") current_index = current_index + 1 if first_index_found: LOGGER.debug(f"Writing {current_index}") interval.text = textgrids.Transcript(str(current_index)) current_annotation.write(os.path.join(output_folder, text_file))
def split_and_name_textgrids( original_folder="/original_en_diapix_data_changed_textgrids", destination_folder='/split_wav_files_folder/'): cwd = os.getcwd() textgrid_list = get_textgrids_for_each_speaker(folder=original_folder) for file_name in textgrid_list: channel1, channel2 = get_file_names(file_name) grid = textgrids.TextGrid(file_name) chan1, chan2 = combine_dfs(grid) # save to csv chan1.to_csv(cwd + '/' + destination_folder + '/' + channel1 + ".TextGrid") chan2.to_csv(cwd + '/' + destination_folder + '/' + channel2 + ".TextGrid")
def save_grid(infile): # Read a textgrid and return a list of a list of the phoneme and its timings phonemes = [] try: grid = textgrids.TextGrid(infile) if grid['MAU'][0].text == '<p:>': displace = float(grid['MAU'][0].xmax) else: displace = 0 for it in grid['MAU']: phonemes.append( [it.text, (it.xmin - displace, it.xmax - displace)]) except TypeError: print('File ' + infile + ' not in proper TextGrid format') return phonemes
def extract_label_timesteps_from_file( path: str, labels: Tuple[str, ...]) -> labels_timesteps_data_type: # TODO: write description try: grid = textgrids.TextGrid(path) except Exception: a = 1 + 2 result_intervals = {} for label in labels: result_intervals[label] = [] for item in grid['ORT']: label = str(item.text) if label in labels: result_intervals[label].append((item.xmin, item.xmax)) return result_intervals
def main(csv_col, path_in, path_out): filename = csv_col[1].replace(".wav", ".TextGrid") filepath = f'{path_in}/{csv_col[4]}/{filename}' f_in = textgrids.TextGrid(filepath) f_in.write(f'{path_out}/{filename}') max = f_in.xmax for _ in range(len(f_in)): pop(f_in) add_tiers("segment", f_in, max, "") add_tiers("target", f_in, max, csv_col[2]) add_tiers("gloss", f_in, max, csv_col[3]) add_tiers("sentence", f_in, max, csv_col[1]) add_tiers("archive id", f_in, max, csv_col[0]) f_in.write(filepath)
def write_text_grid_from_segmentation(segmentation, text_name, output_folder, xmin=0, xmax=0, audio_frequency=16000): tg = textgrids.TextGrid() tg.xmin = xmin tg.xmax = xmax tier = textgrids.Tier() tg[text_name] = tier previous_segment = 0 print(f"tokens: {len(segmentation)}") for xmin, xmax in segmentation: tier.append( textgrids.Interval("", previous_segment / audio_frequency, xmin / audio_frequency)) tier.append( textgrids.Interval("sil", xmin / audio_frequency, xmax / audio_frequency)) previous_segment = xmax tg.write(os.path.join(output_folder, f"automatic_{text_name}.TextGrid"))
def textgrids2csv(path): path += '/' root, dirs, all_files = next(os.walk(path)) textgrid_idx = np.squeeze(np.where([fl.split('.')[-1]=='TextGrid' for fl in all_files])) print(textgrid_idx) files = [all_files[idx] for idx in textgrid_idx] print(files) for fl in files: grid = textgrids.TextGrid(path+fl) clip = grid['clip'] print(clip) opFile = path + '/' + fl.split('.')[0] + '.csv' fid = open(opFile, 'a+', encoding = 'utf-8') heading = 'label,duration,xmin,xmax' fid.write(heading + '\n') for intval in clip: print(intval.text, intval.dur, intval.xmin, intval.xmax) values = intval.text + ',' + str(intval.dur) + ',' + str(intval.xmin) + ',' + str(intval.xmax) fid.write(values + '\n') fid.close()
def evaluate_single_file(text_folder, wav_folder, results_folder, file_name): LOGGER.debug(f"Processing {file_name}") text = " ".join( open(os.path.join(text_folder, f"{file_name}.txt"), 'r').readlines()) tokenized_text = sent_tokenize(text) tokenized_text = tokenized_text[1:] segments, audio_frequency = extract_segments_from_file( os.path.join(wav_folder, f"{file_name}.wav")) min_length = min(len(tokenized_text), len(segments)) tg = textgrids.TextGrid() tg.xmin = 0 tg.xmax = segments[-1][1] / audio_frequency tier = textgrids.Tier() tg[file_name] = tier previous_segment = 0 for i in range(min_length): xmin, xmax = segments[i] tier.append( textgrids.Interval(tokenized_text[i], previous_segment / audio_frequency, xmin / audio_frequency)) previous_segment = xmax tg.write( os.path.join(results_folder, f"silence_aligned_{file_name}.TextGrid"))
except AttributeError: print(tree) pass pw_dict = {} outfile = os.path.join(out_dir, 'word_times_' + sentence_id + '.pickle') textgrid_file = os.path.join( path_to_textgrids, sentence_id2speaker[sentence_id], sentence_id2speaker[sentence_id] + "-" + os.path.splitext(os.path.basename(file))[0].replace("_", "-") + ".TextGrid") grid = textgrids.TextGrid(textgrid_file) index = 0 # get all alignments (based on recording) alignments = [] for word in grid["words"]: label = word.text.transcode() if label != "": alignments.append(word) #alignment_words = [word.text.transcode() for word in alignments] def remove(alignments): print([(word_from_tg.text.transcode(), word_from_transcription) for word_from_tg, word_from_transcription in zip( alignments, transcription)]) if all(word_from_tg.text.transcode() == word_from_transcription for word_from_tg, word_from_transcription in zip(
def main(): try: # création d'un objet Sound avec notre fichier audio snd = parselmouth.Sound(SOUND_PATH_FILE) # on récupere la fréquence d'échantillonnage frequency = snd.get_sampling_frequency() # on récupere l'intensité de l'enregistrement record_intensity = snd.get_intensity() # on crée un nouvel objet Sound new_sound = call("Create Sound from formula", "fichier_synthese", 1, 0, 0.05, frequency, "0") # on ouvre le fichier textGrid segmentation = textgrids.TextGrid(GRID_PATH_FILE) # phrase à faire prononcer (par défaut, c'est la premiere) sentence = ORTHO_SENTENCES[0] # on récupere la liste de mots de la phrase, la position du verbe, # la position de la conjonction de coordination et si elle est présente et le nombre de mots phono_sentence, verb_offsets, conj_offset, nbr_words = convert_ortho_sentence( sentence) # on récupère le coefficient pour les algorithmes coefficient = get_coefficient(nbr_words) # pour chaque mot de la phrase for num_word in range(len(phono_sentence)): # mot avant le mot actuel word, valeur '_' au debut car ce phonème represente le silence de début ou de fin de phrases # dans mon textGrid. De plus, impossible de laisser une chaine vide pour l'indexage word_before = '_' # si le mot n'est pas le premier dans la phrase, alors on determine le mot qui le précède (pour les liaisons) if num_word != 0: word_before = phono_sentence[num_word - 1] # mot actuel word = phono_sentence[num_word] # pour chaque lettre du mot word, i est l'index de chaque lettre du mot word i = 0 while i < len(word): middle_last_phon = middle_phon = None # phonème précédent last_text_phon = '' last_phon = None # on parcourt l'ensemble des intervals for j, phon in enumerate(segmentation['diphones']): # si il s'agit du premier phonème du mot if i == 0: # on prend on compte le dernier phoneme du mot précédent et le premier du mot actuel pour les liaisons if last_text_phon == word_before[ -1] and phon.text == word[i]: # on calcule le milieu du phonème actuel et celui du précédent middle_last_phon = last_phon.xmin + ( (last_phon.xmax - last_phon.xmin) / 2) middle_phon = phon.xmin + ( (phon.xmax - phon.xmin) / 2) # on incremente i de 1 = on passe au phoneme suivant et on sort de la boucle for i += 1 break else: # si il s'agit du dernier phoneme de la phrase, on ajoute le diphone de pause de fin de phrase # le signe de pause est '_' dans mon textGrid if num_word == len(phono_sentence) - 1 and i == len( word) - 1: if last_text_phon == word[i] and phon.text == '_': # on calcule le milieu du phonème actuel et celui du précédent middle_last_phon = last_phon.xmin + ( (last_phon.xmax - last_phon.xmin) / 2) middle_phon = phon.xmin + ( (phon.xmax - phon.xmin) / 2) # on incremente i de 1 = on passe au phoneme suivant et on sort de la boucle for i += 1 break else: # sinon, on cherche les diphones dans le mot if last_text_phon == word[ i - 1] and phon.text == word[i]: # on calcule le milieu du phonème actuel et celui du précédent middle_last_phon = last_phon.xmin + ( (last_phon.xmax - last_phon.xmin) / 2) middle_phon = phon.xmin + ( (phon.xmax - phon.xmin) / 2) # on incremente i de 1 = on passe au phoneme suivant et on sort de la boucle for i += 1 break # si le diphone est introuvable, on passe au phoneme suivant du mot word # il s'agit le plus souvent d'une liaison non prise en compte if j == len(segmentation['diphones']) - 1: print( 'AVERTISSEMENT : Un des diphones est introuvable !' ) i += 1 # on passe au phoneme suivant dans le textgrid # le phoneme actuel devient le phoneme précédent last_phon = phon last_text_phon = phon.text # si le diphone a été trouvé dans le textgrid if middle_phon is not None and middle_last_phon is not None: # on recupere l'intersection avec zéro la plus proche pour le milieu de chaque phonème middle_last_phon = snd.get_nearest_zero_crossing( middle_last_phon, 1) middle_phon = snd.get_nearest_zero_crossing(middle_phon, 1) # on extrait le diphone voulu dans la variable extrait extrait = snd.extract_part( middle_last_phon, middle_phon, parselmouth.WindowShape.RECTANGULAR, 1, False) # création d'un objet Manipulation pour modifier la frequence # et la durée de l'extrait manipulation = call(extrait, "To Manipulation", 0.001, 75, 600) frequence = get_frequency_with_word( num_word, verb_offsets, conj_offset, coefficient) relative_duration = get_relative_duration_with_word( num_word, verb_offsets, conj_offset, coefficient) intensity = get_intensity(num_word, verb_offsets, nbr_words, record_intensity) # on modifie la fréquence fondamentale de l'extrait extrait = alter_pitch(extrait, manipulation, frequence) # on modifie la durée de l'extrait extrait = alter_duration(extrait, manipulation, relative_duration) # on modifie l'intensité de l'extrait extrait.scale_intensity(intensity) # on concatène le diphone obtenu avec new_sound new_sound = new_sound.concatenate([new_sound, extrait]) # on sauvegarde le résultat dans un fichier .wav new_sound.save(RESULT_PATH_FILE, parselmouth.SoundFileFormat.WAV) # si une erreur se produit except Exception as error: print('Une erreur s\'est produite : {}'.format(error))
#!/usr/bin/env python3 # created by Mana ASHIDA, 12-09-2020, ver.1.0 import glob import csv import textgrids from tqdm import tqdm print("What is the path to the folder containing target TextGrids?") path_in = input() paths = glob.glob(path_in+"/*.TextGrid") f = open(path_in+"/durations.csv", "w") writer = csv.writer(f) for path in tqdm(sorted(paths)): f_in = textgrids.TextGrid(path) writer.writerow([path.split("/")[-1], f_in.xmax]) f.close() print("duration.csv is generated in "+path_in)
def transform_textgrids(oArgs, errHandle): """Transform the textgrids in the input directory""" # Initialisations src_ext = ".TextGrid" outfile_name = "jasmintg.xlsx" headers = ['child', 'tier1', 'tier2', 'tier5', 'tier6_L', 'tier6_N'] try: dirInput = oArgs['input'] dirOutput = oArgs['output'] force = oArgs['force'] debug = oArgs['debug'] # Determine the output file outfile = os.path.join(dirOutput, outfile_name) # Start a workbook wb = openpyxl.Workbook() ws = wb.get_active_sheet() ws.title = "Data" # Set up the column headers for col_num in range(len(headers)): c = ws.cell(row=1, column=col_num + 1) c.value = headers[col_num] c.font = openpyxl.styles.Font(bold=True) # Set width to a fixed size ws.column_dimensions[get_column_letter(col_num + 1)].width = 8.0 # Walk all the files in the input lst_src = [ os.path.join(dirInput, f) for f in os.listdir(dirInput) if os.path.isfile(os.path.join(dirInput, f)) and src_ext in f ] row_num = 1 for file in lst_src: row_num += 1 # Get the name of the child from the name of the file child = os.path.basename(file).replace(src_ext, "") # Show where we are errHandle.Status("child = {}".format(child)) # Read the textgrid file grid = textgrids.TextGrid(file) # Access the tiers that we need counter = 1 for k, v in grid.items(): if counter == 1: tier1 = v elif counter == 2: tier2 = v elif counter == 5: tier5 = v elif counter == 6: tier6 = v counter += 1 offset2 = 0 offset5 = 0 offset6 = 0 # Walk through all the items in tier1 for idx, t1 in enumerate(tier1): # Get the corresponding values in the other tiers t2 = tier2[idx + offset2] t5 = tier5[idx + offset5] t6 = tier6[idx + offset6] while not is_close( t1, t2 ) and t2.xmin < t1.xmin and idx + offset2 < len(tier2) - 1: offset2 += 1 t2 = tier2[idx + offset2] while not is_close(t1, t2) and t2.xmin > t1.xmin: offset2 -= 1 t2 = tier2[idx + offset2] while not is_close( t1, t5 ) and t5.xmin < t1.xmin and idx + offset5 < len(tier5) - 1: offset5 += 1 t5 = tier5[idx + offset5] while not is_close(t1, t5) and t5.xmin > t1.xmin: offset5 -= 1 t5 = tier5[idx + offset5] while not is_close( t1, t6 ) and t6.xmin < t1.xmin and idx + offset6 < len(tier6) - 1: offset6 += 1 t6 = tier6[idx + offset6] while not is_close(t1, t6) and t6.xmin > t1.xmin: offset6 -= 1 t6 = tier6[idx + offset6] # Check if all tiers synchronize if is_close(t1, t2) and is_close(t1, t5) and is_close(t1, t6): # All is well: process arCombi = t6.text.split("/") letter = "" number = "" if len(arCombi) == 1: v = arCombi[0] if re.match("^\d+$", v): number = v letter = "" else: number = "" letter = v elif len(arCombi) == 2: letter = arCombi[0].strip() number = arCombi[1].strip() # Create list of values row = [child, t1.text, t2.text, t5.text, letter, number] for idx, v in enumerate(row): cell_this = ws.cell(row=row_num, column=idx + 1) cell_this.value = v cell_this.alignment = openpyxl.styles.Alignment( wrap_text=False) # We are going to the next row row_num += 1 else: # Synchronization problem msg = "Synchronization problem in [{}] tier {} t1={} t2={} t5={} t6={}".format( child, idx, t1.xmin, t2.xmin, t5.xmin, t6.xmin) errHandle.Status(msg) # Save the result wb.save(outfile) return True except: errHandle.DoError("transform_textgrids") return False
import textgrids # ---------------------------------------------------------- # Input TextGrid from CLII if len(sys.argv) < 3: print("Usage:", sys.argv[0], '<filename> <tiername>') exit() fname = sys.argv[1] tname = sys.argv[2] if not os.path.isfile(fname): print("File", fname, "does not exist.") exit() try: grid = textgrids.TextGrid(fname) print("Reading the Textgrid file..." + fname) except (textgrids.ParseError, textgrids.BinaryError): print("Not a recognised file format!", file=sys.stderr) fname = sys.argv[1].split('.')[0] # grid = grid.write(fname+'.TextGrid', fmt=TEXT_LONG).TextGrid() tier = '' for syll in grid[tname]: label = syll.text.transcode() smin = '{:.4f}'.format(syll.xmin) smax = '{:.4f}'.format(syll.xmax) sdur = '{:.4f}'.format(syll.dur) interval = label+'\t'+smin+'\t'+smax+'\t'+sdur+'\t'+fname+'\n' tier += interval