def get_corr_pos(validdf, tg): word = [] phone = [] pos = [] pm = audiolabel.LabelManager(from_file=tg, from_type='praat') for i in range(0, len(validdf) - 1): synctime = validdf.time[i] labword = pm.tier('word').label_at(synctime).text labmatch = pm.tier('phone').label_at(synctime).text labt1 = float(pm.tier('phone').label_at(synctime).t1) labt2 = float(pm.tier('phone').label_at(synctime).t2) labperc = float((synctime - labt1) / (labt2 - labt1)) phone.append(labmatch) pos.append(labperc) word.append(labword) posdf = pd.concat([validdf, pd.DataFrame({'phone': phone})], ignore_index=False, axis=1) posdf = pd.concat([posdf, pd.DataFrame({'pos': pos})], ignore_index=False, axis=1) posdf = pd.concat([posdf, pd.DataFrame({'word': word})], ignore_index=False, axis=1) return posdf
def newWord(): hasNull = True while hasNull: rand = random.choice(wordTasks) word = re.compile(rand[0].upper()) task = jwdir + rand[1] tg = task + '.TextGrid' wav = task + '.wav' txy = task + '.txy' pm = al.LabelManager(from_file=tg, from_type='praat') df = pd.read_csv(txy, sep='\t', na_values="1000000", names=[ 'time', 'ULx', 'ULy', 'LLx', 'LLy', 'T1x', 'T1y', 'T2x', 'T2y', 'T3x', 'T3y', 'T4x', 'T4y', 'MIx', 'MIy', 'MMx', 'MMy' ]) df = df * 1e-3 # convert xy to mm, and time values to msec df['time'] = df['time'] * 1e-3 # Convert to seconds df['sec'] = df['time'] df = df.set_index(['sec']) for word in pm.tier('word').search(word): t1idx = df.index.get_loc(word.t1 - .1, method='nearest') t2idx = df.index.get_loc(word.t2 + .1, method='nearest') sel = df.iloc[t1idx:t2idx] hasNull = sel.isnull().values.any() setGlobal(sel, rand[0])
def test_LabelManager_params(): lm = audiolabel.LabelManager( from_file='test/this_is_a_label_file.short.TextGrid', from_type='praat') assert len(lm._tiers) == 3 assert lm.names == ('word', 'phone', 'stimulus') assert '{:0.4f}'.format(lm.tier(0)[1].t1) == '0.0453' lm = audiolabel.LabelManager( from_file='test/this_is_a_label_file.short.TextGrid', from_type='praat', names=['sec', 'rms', 'f1'], scale_by=100, shift_by=10) assert len(lm._tiers) == 3 assert lm.names == ('sec', 'rms', 'f1') # shift_by is in the time units after scale_by is applied assert '{:0.4f}'.format(lm.tier(0)[1].t1) == '14.5320'
def sync_lm(self): """The LabelManager for .sync.textgrid.""" lm = self._sync_lm if lm is None: lm = audiolabel.LabelManager(from_file=self.abs_sync_tg, from_type='praat') self._sync_lm = lm return lm
def get_datadf_2der(rawUSinput, au, syncloc, maxhz=300): frame_times = audiolabel.LabelManager(from_file=syncloc, from_type='table', t1_col='seconds').as_df()[1] frame_times = frame_times.rename(columns={'text': 'frameN', 't1': 'time'}) if isinstance(rawUSinput, np.ndarray): frames = rawUSinput else: frames = [ rawUSinput.get_frame(i) for i in range(0, rawUSinput.nframes) ] frames_diff = [ np.mean(np.abs(frames[i] - frames[i - 1])) for i in range(1, len(frames)) ] frame_times['us_diff'] = frame_times['frameN'].apply(lambda x: frames_diff[ int(x) - 1] if (x != 'NA' and int(x) > 0) else np.nan) for i in range(1, len(frame_times)): if frame_times['frameN'][i - 1] == 'NA': frame_times.loc[i, 'us_diff'] = np.nan if maxhz > 0: au = pcall(au, 'Filter (stop Hann band)...', 0, maxhz, 100) # filter voicing pmfcc = au.to_mfcc() mfcc = np.transpose(pmfcc.to_array( )) # transpose this to get time (frames) on the first dimension au_diff = [ np.mean(np.abs(mfcc[i] - mfcc[i - 1])) for i in range(1, len(mfcc)) ] frame_times['au_diff'] = frame_times.time.apply( lambda x: au_diff[int(pmfcc.get_frame_number_from_time(x) + 1)]) pmint = au.to_intensity() frame_times['au_int'] = frame_times.time.apply( lambda x: pmint.get_value(x)) us_acc = [ frame_times['us_diff'][i] - frame_times['us_diff'][i - 1] for i in range(2, len(frame_times)) ] frame_times['us_acc'] = frame_times['frameN'].apply( lambda x: us_acc[int(x) - 1] if (x != 'NA' and int(x) > 1) else np.nan) au_acc = [ frame_times['au_diff'][i] - frame_times['au_diff'][i - 1] for i in range(1, len(frame_times)) ] frame_times['au_acc'] = np.nan frame_times['au_acc'][1:len(frame_times)] = au_acc return frame_times
def test_table(): lm = audiolabel.LabelManager(from_file='test/sample.table', from_type='table', fields_in_head=True, t1_col='sec') assert len(lm._tiers) == 6 assert lm.tier('rms')[0].text == '7.1' rms0 = lm.tier('rms')[0] assert lm.tier('rms').next(rms0).text == '7.4' assert lm.tier('rms').next(rms0, 2).text == '7.3'
def test_praat_short_empty_tier(): lm = audiolabel.LabelManager(from_file='test/empty_tier.short.TextGrid', from_type='praat') assert len(lm._tiers) == 5 assert lm.names == ('V1', 'empty_point_1', 'empty_interval', 'V2', 'empty_point_end') assert (len(lm.tier('V1')) == 3) assert (len(lm.tier('empty_point_1')) == 0) assert (len(lm.tier('empty_interval')) == 0) assert (len(lm.tier('V2')) == 4) assert (len(lm.tier('empty_point_end')) == 0)
def test_names_property(): lm = audiolabel.LabelManager( from_file='test/this_is_a_label_file.short.TextGrid', from_type='praat') assert lm.names == ('word', 'phone', 'stimulus') lm.names = ['sec', 'rms'] assert lm.names == ('sec', 'rms', 'stimulus') lm.names = [None, 'two'] assert lm.names == ('sec', 'two', 'stimulus') lm.names = ['one', 'two', 'three'] assert lm.names == ('one', 'two', 'three')
def test_praat_utf_16_be_warn(): sys.stderr.write( '''The following two lines should be repeated after the asterisks: WARNING: overriding user-specified encoding utf-8. Found BOM for utf_16_be encoding. *************** ''') lm = audiolabel.LabelManager( from_file='test/Turkmen_NA_20130919_G_3.TextGrid', from_type='praat', codec='utf-8')
def test_table_pipe(): # First, create table from file. lm_file = audiolabel.LabelManager(from_file='test/pplain.table', from_type='table', sep=' ', fields_in_head=False, fields='f0,is_voiced,rms,acpeak', t1_col=None, t1_start=0.0, t1_step=0.010) assert len(lm_file._tiers) == 4 assert lm_file.tier('rms')[0].text == '0' rms0 = lm_file.tier('rms')[0] assert lm_file.tier('rms').next(rms0).text == '28.0572' assert lm_file.tier('rms').next(rms0, 2).text == '47.6023' # Second, create table from same file and using subprocess. cat_proc = subprocess.Popen(['cat', 'test/pplain.table'], stdout=subprocess.PIPE) lm_proc = audiolabel.LabelManager(from_file=cat_proc.stdout, from_type='table', sep=' ', fields_in_head=False, fields='f0,is_voiced,rms,acpeak', t1_col=None, t1_start=0.0, t1_step=0.010) assert len(lm_proc._tiers) == 4 assert lm_proc.tier('rms')[0].text == '0' rms0 = lm_proc.tier('rms')[0] assert lm_proc.tier('rms').next(rms0).text == '28.0572' assert lm_proc.tier('rms').next(rms0, 2).text == '47.6023' # Third, make sure the results are the same. for name in lm_file.names: tier_file = lm_file.tier(name) tier_proc = lm_proc.tier(name) for (l_file, l_proc) in zip(tier_file, tier_proc): assert (l_file.t1 == l_proc.t1) assert (l_file.t2 == l_proc.t2) assert (l_file.text == l_proc.text)
def gather(self, params_file='params.cfg'): """Gather the metadata from an acquisition directory.""" bpr = '' if self.dtype == 'bpr': try: rdr = self.image_reader except: self.n_frames = None self.image_h = None self.image_w = None self._probe = None self.n_frames = rdr.header.nframes self.image_h = rdr.header.h self.image_w = rdr.header.w self._probe = rdr.header.probe else: raise AcqError("Unknown type '{:}' specified.".format(type)) try: self.imaging_params = read_params( os.path.join(self.abspath, params_file)) except IOError: self.imaging_params = None try: with open(os.path.join(self.abs_versions_file)) as f: self.versions = f.read() except IOError: self.versions = None try: with open(os.path.join(self.abs_stim_file)) as f: self.stimulus = f.read() except IOError: self.stimulus = None try: tg = self.abs_sync_tg lm = audiolabel.LabelManager(from_file=tg, from_type='praat') durs = [l.duration for l in lm.tier('pulse_idx').search(r'^\d+$')] self.n_pulse_idx = len(durs) self.n_raw_data_idx = len( [l for l in lm.tier('raw_data_idx').search(r'^\d+$')]) self.pulse_max = np.max(durs) self.pulse_min = np.min(durs) except IOError as e: self.n_pulse_idx = None self.n_raw_data_idx = None self.pulse_max = None self.pulse_min = None
def test_table_pipe_newlines(): '''Test reading from table using subprocess with universal_newlines.''' cat_proc = subprocess.Popen(['cat', 'test/pplain.table'], stdout=subprocess.PIPE, universal_newlines=True) lm_proc = audiolabel.LabelManager(from_file=cat_proc.stdout, from_type='table', sep=' ', fields_in_head=False, fields='f0,is_voiced,rms,acpeak', t1_col=None, t1_start=0.0, t1_step=0.010) assert len(lm_proc._tiers) == 4 assert lm_proc.tier('rms')[0].text == '0' rms0 = lm_proc.tier('rms')[0] assert lm_proc.tier('rms').next(rms0).text == '28.0572' assert lm_proc.tier('rms').next(rms0, 2).text == '47.6023'
def test_initialization(): l1 = audiolabel.Label('first', 1.0, 2.0) l2 = audiolabel.Label('second', 2.0, 3.0) l3 = audiolabel.Label('third', 3.0, 4.0) assert l1.text == 'first' assert l1.t1 == 1.0 assert l1.t2 == 2.0 t1 = audiolabel.IntervalTier(name='tier1') t1.add(l1) t1.add(l2) t1.add(l3) assert t1.label_at(2.5) == l2 assert t1[0] == l1 assert t1[-1] == l3 lm = audiolabel.LabelManager() lm.add(t1) assert lm.tier('tier1') == t1 assert t1.next(l1) == l2 assert t1.next(l1, skip=1) == l3 assert t1.prev(l3) == l2 assert t1.prev(l3, skip=1) == l1
subs[i]) and (os.path.isfile(syncfile)) and ( os.path.isfile(stimfile)) and (os.path.isfile(wavfile)): stim = open(stimfile) stimtext = stim.read() for w in WORDS: w_reg = re.escape(w) + r"$" if w and re.findall(w_reg, stimtext): if not (os.path.isfile(outfile)): proc = subprocess.check_call( ['pyalign', wavfile, stimfile, outfile]) fname = os.path.splitext(outfile)[0] pm = audiolabel.LabelManager(from_file=outfile, from_type='praat') for lab in pm.tier('phone'): if (re.match(TAR, lab.text)): label = lab.text t1 = lab.t1 t2 = lab.t2 #mid = (t2+t1)/2 if s < 128: sm = audiolabel.LabelManager( from_file=syncfile, from_type='table', fields_in_head=False, fields='t1,frameidx') # this assumes all the bprs have already been converted to bmps.
conv_frame = e.acquisitions[0].image_reader.get_frame(0) conv_img = test.image_converter.as_bmp(conv_frame) for idx, a in enumerate(e.acquisitions): if frames is None: if args.convert: frames = np.empty([len(e.acquisitions)] + list(conv_img.shape)) else: frames = np.empty([len(e.acquisitions)] + list(a.image_reader.get_frame(0).shape)) * np.nan a.gather() tg = str(a.abs_image_file + ".ch1.TextGrid") pm = audiolabel.LabelManager(from_file=tg, from_type="praat") print(pm) v, m = pm.tier('phone').search( vre, return_match=True)[-1] # return last V = target V print(v) # print('here comes m') # print(m) #print( pm.tier('phone')) word = pm.tier('word').label_at(v.center).text print(word) # print(phone) # phase.append(a.runvars.phase) trial.append(idx) tstamp.append(a.timestamp) phone.append(v.text)
else: tone = "high" # get condition ("ba"/"init"), place, aspiration (in that order in md) # replace with your own metadata as needed md = os.path.join(parent,"meta.txt") with open(md) as csvfile: meta = reader(csvfile, delimiter="\t") tbl = [row for row in meta] # a little janky but works condition = tbl[1][0] place = tbl[1][1] aspiration = tbl[1][2] # instantiate audio TG handler; get release time and phone label # assumes one labeled interval on the searched tier pm = audiolabel.LabelManager(from_file=tg, from_type='praat') clos = pm.tier('closure').search(".", return_match=True)[0][0] # alternately, search for a set: (see lines 56-60) # v = pm.tier('segments').search(vow, return_match=True)[0][0] release_time = clos.t2 phone = clos.text # get token number token_ct.append(phone) token = token_ct.count(phone) # instantiate sync TG handler; get absolute index of frame sc = audiolabel.LabelManager(from_file=sync_tg, from_type='praat') release_idx = sc.tier('pulse_idx').label_at(release_time).text abs_fr_idx = int(release_idx) - 1 # should yield 121 for first file
0.02, 44100, "0") for wave_file in glob.glob(glob_regexp): parent = os.path.dirname(wave_file) condition = os.path.dirname(parent) # skip landmark/practice trials stimfile = os.path.join(parent, "stim.txt") # this is lower-case stim = read_stimfile(stimfile).upper() # makes it upper-case if stim.lower() not in wrds: continue # define other files of interest acq = os.path.split(parent)[1] tg_handle = os.path.join(parent, str(acq + ".ch1.TextGrid")) tg = audiolabel.LabelManager(from_file=tg_handle, from_type='praat') sound = parselmouth.Sound(wave_file) sound = sound.resample(44100) matches = tg.tier('words').search(word_regexp) if len(matches) > 1: print("Multiple tokens of {} in {}, skipping!".format(stim, acq)) continue match = matches[0] # take first item (only item) from the match list labels = tg.tier('phones').tslice(t1=match.t1, t2=match.t2) # remove intervals surrounding word, which are included in tslice phones = labels[1:-1] # get last two intervals and check
# skip over other acoustics folders if parent.endswith("sauce"): #print("Skipping sauce") continue # skip landmark/practice trials stimfile = os.path.join(parent, "stim.txt") stim = read_stimfile(stimfile) #print(stim) if stim in skip_set: continue # define other files of interest acq = os.path.split(parent)[1] tg_handle = os.path.join(parent, str(acq + ".ch1.TextGrid")) tg = audiolabel.LabelManager(from_file=tg_handle, from_type='praat') for f in tg.tier('phone'): # skip any target segments not in a word in the target list if f.text not in target_segments: continue # adjust labels to disambiguate pron = tg.tier('word').label_at(f.center).text if pron in target_list: if f.text == "IY1": if pron in iz_list: # change if IZ f.text = "IZ1" elif pron == "YZ" or pron == "XYZ": f.text = "YZ1" elif pron == "SIEX" or pron == "XIEX":
def test_praat_short_generic_fromtype(): lm = audiolabel.LabelManager( from_file='test/this_is_a_label_file.short.TextGrid', from_type='praat') assert len(lm._tiers) == 3 assert lm.names == ('word', 'phone', 'stimulus')
def test_as_string_praat_long(): '''Test output of as_string(). Make sure to handle quotation marks.''' lm = audiolabel.LabelManager(from_file='test/quotes.TextGrid', from_type='praat') s = 'File type = "ooTextFile"\nObject class = "TextGrid"\n\nxmin = 0.00000000000000000000\nxmax = 1.00000000000000000000\ntiers? <exists>\nsize = 2\nitem []:\n item [1]:\n class = "IntervalTier"\n name = "interval"\n xmin = 0.000000000000\n xmax = 1.000000000000\n intervals: size = 2\n intervals [1]:\n xmin = 0.00000000000000000000\n xmax = 0.50000000000000000000\n text = """a\'b\'c""d e"""\n intervals [2]:\n xmin = 0.50000000000000000000\n xmax = 1.00000000000000000000\n text = ""\n item [2]:\n class = "TextTier"\n name = "point"\n xmin = 0.000000000000\n xmax = 1.000000000000\n points: size = 1\n points [1]:\n number = 0.50000000000000000000\n mark = """a\'b\'c""d e"""' assert s == lm.as_string('praat_long')
def processwav(wav, tg): # wav = path to the wav file in dir f, sr = librosa.load( wav, sr=12000) # load in file and the sampling rate you want pm = audiolabel.LabelManager(from_file=os.path.join(dirpath, tg), from_type="praat") # open text grid for word in pm.tier('Word').search(words): t1_idx = np.floor((word.t1) * sr) # Convert time to integer index t2_idx = np.floor((word.t2) * sr) snippet = f[int(t1_idx):int(t2_idx)] snippet_pm = pm.tier('Phone').tslice(word.t1, word.t2, lincl=False, rincl=False) #option to apply pre-emphasis #emp_f = np.append(f[0], f[1:] - 0.97 * f[:-1]) # get the spectrum FFT = librosa.stft(snippet, n_fft=n_fft, hop_length=hop, win_length=window) # convolve the filterbank over the spectrum S = mel_f.dot(np.abs(FFT)) def get_spectrum(S, t1, t2, step_size=step_size, plot=False, label='c'): start_frame = np.int(t1 / step_size) end_frame = np.int(t2 / step_size) mean_mel_spectrum = np.mean(np.log(S[:, start_frame:end_frame]), axis=1) return mean_mel_spectrum #loop through all of the (specified) labels on the "phone" tier of the current word for v in snippet_pm: if re.match(segments, v.text): t1 = v.t1 - word.t1 t2 = v.t2 - word.t1 spectrum = get_spectrum(S, t1, t2, step_size=step_size) # option to add a for loop here that appends each of the following measurements # for every spectral measurement in 'spectrum' speakerlist.append(wav.split("_", 1)[0]) agelist.append(wav.split("_", 2)[1]) filenamelist.append(wav) phonelist.append(pm.tier('Phone').label_at(v.center).text) vectorlist.append(spectrum) followinglist.append((pm.tier('Phone').next(v)).text) prevlist.append((pm.tier('Phone').prev(v)).text) wordlist.append(pm.tier('Word').label_at(v.center).text) notelist.append(pm.tier('Notes').label_at(v.center).text) t1list.append(v.t1) t2list.append(v.t2) durlist.append(v.t2 - v.t1) #morphlist.append((pm.tier('Morpheme').label_at(v.center)).text) worddurlist.append(word.t2 - word.t1) normlist = [x / (word.t2 - word.t1) for x in vectorlist] # normalize by speaking rate df = pd.DataFrame( OrderedDict( (('Speaker', pd.Series(speakerlist)), ('Age', pd.Series(agelist)), ('Filename', pd.Series(filenamelist)), ('Phone', pd.Series(phonelist)), ('Spectrum', pd.Series(vectorlist)), ('Previous', pd.Series(prevlist)), ('Following', pd.Series(followinglist)), ('Word', pd.Series(wordlist)), ('Note', pd.Series(notelist)), ('phone_t1', pd.Series(t1list)), ('phone_t2', pd.Series(t2list)), ('Phone_duration', pd.Series(durlist)), ('Word_duration', pd.Series(worddurlist)), ('Normalized_Spectrum', pd.Series(normlist))))) df.to_csv(os.path.splitext(soundfile)[0] + '.mel_spectrum.csv', encoding='utf-8')
# print(tp) stimfile = a.abs_stim_file stim = open(stimfile) stimtext = stim.read() # outfile = a.abspath+'/'+tp+'.TextGrid' outfile = expdir + '/' + tp + '/' + tp + '.TextGrid' print(outfile) bprtg = expdir + '/' + tp + '/' + tp + '.bpr.sync.TextGrid' if not os.path.isfile(bprtg): continue print('no bpr sync') print(outfile) print('gonna give you') fname = os.path.splitext(outfile)[0] print(fname) pm = audiolabel.LabelManager(from_file=outfile, from_type='praat') for plab in pm.tier('phone'): if plab.text == 'R': print(plab.text) frt1 = plab.t1 # print frt1 # print 'that was frt1' frt2 = plab.t2 for l in a.pulse_idx.tslice(t1=frt1, t2=frt2): # l should look something like Label( t1=0.3947, t2=0.4035, text='b'21'' ) print(l) try: d = a.frame_at(l.t1, convert=True) if d is None: continue
def extract_frames(expdir, list_filename=None, frames=None): """Extract image frames from specified acquisitions and return as a numpy array and dataframe with associated metadata. list_filename = filename containing a list of tuple triples, as in frames frames = list of tuple triples containing an acquisition timestamp string, a raw_data_idx frame index, and data type (default is 'bpr') expdir = the root experiment data directory Returns an (np.array, pd.DataFrame) tuple in which the array contains the frames of image data and the DataFrame contains acquisition metadata. The rows of the DataFrame correspond to the first axis of the array. """ fields = ['stimulus', 'timestamp', 'utcoffset', 'versions', 'n_pulse_idx', 'n_raw_data_idx', 'pulse_max', 'pulse_min', 'imaging_params', 'n_frames', 'image_w', 'image_h', 'probe'] if list_filename is not None: frames = pd.read_csv(list_filename, sep='\s+', header=None) else: frames = pd.DataFrame.from_records(frames) if frames.shape[1] == 2: frames['dtype'] = 'bpr' frames.columns = ['tstamp', 'fr_id', 'dtype'] rows = [] data = None for idx, rec in frames.iterrows(): a = ultratils.acq.Acq( timestamp=rec['tstamp'], expdir=expdir, dtype=rec['dtype'] ) a.gather() if idx == 0: for v in a.runtime_vars: fields.insert(0, v.name) if rec['dtype'] == 'bpr': rdr = BprReader(a.abs_image_file) else: raise AcqError('Only bpr data is supported.') # Initialize array with NaN on first pass. if data is None: data = np.zeros([len(frames), rdr.header.h, rdr.header.w]) * np.nan # Assume fr_id is a raw_data_idx if it's an integer; otherwise it's a time. try: if 'fr_id' in frames.select_dtypes(include=['integer']).columns: fr_idx = rec['fr_id'] else: lm = audiolabel.LabelManager( from_file=a.abs_sync_tg, from_type='praat' ) fr_idx = int(lm.tier('raw_data_idx').label_at(rec['fr_id']).text) data[idx] = rdr.get_frame(fr_idx) except Exception as e: fr_idx = None row = a.as_dict(fields) row['raw_data_idx'] = fr_idx rows.append(row) return (data, pd.DataFrame.from_records(rows))
def processwav(wav, tg): # wav = path to the wav file in dir f, sr = librosa.load(wav, sr=12000) # load in file and the sampling rate you want pm = audiolabel.LabelManager(from_file=os.path.join(dirpath,tg),from_type="praat") # open text grid for word in pm.tier('Word').search(words): t1_idx = np.floor((word.t1)*sr) # Convert time to integer index t2_idx = np.floor((word.t2)*sr) snippet = f[t1_idx:t2_idx] snippet_pm = pm.tier('Phone').tslice(word.t1, word.t2, lincl=False, rincl=False) #option to apply pre-emphasis #emp_f = np.append(f[0], f[1:] - 0.97 * f[:-1]) # get the spectrogram FFT = librosa.stft(snippet, n_fft=n_fft, hop_length=hop, win_length=window) # convolve the filterbank over the spectrogram S = np.log(mel_f.dot(np.abs(FFT))) # note that log is moved up here; log freq is only calculated with mean in function below for other scripts def get_vowel_spectrum(S,t1,t2,step_size=step_size, plot=False,label='c'): start_frame = np.int(t1/step_size) end_frame = np.int(t2/step_size) frame = S[:,start_frame:end_frame] # taking log of mel filterbank energies return frame #loop through all of the (specified) labels on the "phone" tier of the current word for v in snippet_pm: if re.match(segments, v.text): #if v.t1==word.t1 or pm.tier('Phone').prev(v).t1==word.t1: # only measure the first two segments of the word #print(v.text, word.text) t1=v.t1-word.t1 t2=v.t2-word.t1 spectrum = get_vowel_spectrum(S, t1, t2,step_size=step_size) # define some general time range of the vowel's associated word for meas in spectrum: #print(meas) speakerlist.append(wav.split("_", 1)[1]) phonelist.append(pm.tier('Phone').label_at(v.center).text) framelist.append(meas) # get vectors for each frame of phone #followinglist.append((pm.tier('Phone').next(v)).text) prevlist.append((pm.tier('Phone').prev(v)).text) wordlist.append((pm.tier('Word').label_at(v.center)).text) target1score.append(pm.tier('Target1Seg').label_at(v.center).text) target2score.append(pm.tier('Target2Seg').label_at(v.center).text) prosodyscore.append(pm.tier('ProsodyScore').label_at(v.center).text) for note in pm.tier('TransNotes'): # not a great method; will break for more labels (albeit there are few examples of this) if word.t1 <= note.t1 <= word.t2: notelist.append(note.text) else: notelist.append('') triallist.append(pm.tier('Trial').label_at(v.center).text) t1list.append(v.t1) t2list.append(v.t2) durlist.append(v.t2-v.t1) worddurlist.append(word.t2-word.t1) normframelist = [x / (word.t2-word.t1) for x in framelist] # normalize each MFCC vector (varies by length of phone 11-14) by word duration df = pd.DataFrame( OrderedDict( (('Speaker', pd.Series(speakerlist)), ('Phone', pd.Series(phonelist)), ('Frame', pd.Series(framelist)), ('Previous', pd.Series(prevlist)), #('Following', pd.Series(followinglist)), ('Word', pd.Series(wordlist)), ('Target1Seg', pd.Series(target1score)), ('Target2Seg', pd.Series(target2score)), ('ProsodyScore', pd.Series(prosodyscore)), ('Note', pd.Series(notelist)), ('Trial', pd.Series(triallist)), ('phone_t1', pd.Series(t1list)), ('phone_t2', pd.Series(t2list)), ('Phone_duration', pd.Series(durlist)), ('Word_duration', pd.Series(worddurlist)), ('Normalized_Frames', pd.Series(normframelist))))) df.to_csv('nwr_mfcc_complete.csv', encoding='utf-8')
except ValueError: pass # ignore line if a header is present # get first frame index bmp_list = glob.glob(os.path.join(dirs, '*.bmp')) fr_idx = [] for bmp in bmp_list: fr_num = re.search('.(\d+).bmp$', bmp) fr_idx.append( int(fr_num.group(1)) ) # the int is crucial here: otherwise, min list idx (b/c list of strings!) will be returned first_fr = min(fr_idx) last_fr = max(fr_idx) # instantiate LabelManager pm = audiolabel.LabelManager(from_file=os.path.join(dirs, textgrid), from_type='praat') # for all relevant vowel intervals for v, m in pm.tier(0).search(vow, return_match=True): # skip any tokens from non-target words pron = pm.tier(1).label_at(v.center).text if pron not in ["BUH", "FUH", "BUW", "BOOW", "BAAE", "BIY"]: continue # correct UH1 vowel depending on pronunciation FUH or BUH elif pron == "FUH": # TODO handle occasional FUW phone = "VU" elif pron == "BUH": phone = "BU" elif pron == "BOOW": phone = "UW"
def processwav(wav, tg): # wav = path to the wav file in dir f, sr = librosa.load( wav, sr=12000) # load in file and the sampling rate you want pm = audiolabel.LabelManager(from_file=os.path.join(dirpath, tg), from_type="praat") # open text grid for word in pm.tier('Word').search(words): t1_idx = np.floor((word.t1) * sr) # Convert time to integer index t2_idx = np.floor((word.t2) * sr) snippet = f[int(t1_idx):int(t2_idx)] snippet_pm = pm.tier('Phone').tslice(word.t1, word.t2, lincl=False, rincl=False) # get the spectrum FFT = librosa.stft(snippet, n_fft=n_fft, hop_length=hop, win_length=window) # convolve the filterbank over the spectrum S = mel_f.dot(np.abs(FFT)) # we average over the entire consonant def get_spectrum(S, t1, t2, step_size=step_size, plot=False, label='c'): start_frame = np.int(t1 / step_size) end_frame = np.int(t2 / step_size) mean_mel_spectrum = np.mean(np.log(S[:, start_frame:end_frame]), axis=1) return mean_mel_spectrum def get_transition_duration(S, s1, s2, word, step_size, plot=False): print(word.text) s1start = s1.t1 - word.t1 s1end = s1.t2 - word.t1 s1mid = (s1end + s1start) / 2 s2start = s2.t1 - word.t1 s2end = s2.t2 - word.t1 s2mid = (s2end + s2start) / 2 s1mean = get_spectrum( S, s1start, s1end, step_size=step_size) # xc - get mean c spectrum s2mean = get_spectrum(S, s2start, s2end, step_size=step_size) # xv start_frame = np.int(s1mid / step_size) end_frame = np.int(s2mid / step_size) # Gerosa & Narayanan: fcv(i) = d(xc, xi) − d(xv, xi) where c and v are consonant & vowel and xi is the frame trajectory = [] for i in np.arange(start_frame, end_frame): spec = np.log(S[:, i]) # xi - get that # frame from the spectrum d1 = np.linalg.norm(s1mean - spec) # d(xc,xi) d2 = np.linalg.norm(s2mean - spec) # d(xv,xi) trajectory.append(d1 - d2) # fcv(i) # may want to plot the trajectory here - sanity check #if (plot): #plt.plot(trajectory) clist = [] vlist = [] for num in trajectory: if num < 0: clist.append(num) else: vlist.append(num) cmean = np.mean(clist) # mean of fcv in c portion vmean = np.mean(vlist) lowbound = cmean * .8 # magic number - the "fixed threshold" highbound = vmean * .8 # assumption here may be false - all frames under threshold are in the transition x = 0 for num in trajectory: if num < highbound and num > lowbound: # only get those values that are within bounds to calculate duration x = x + 1 transition_duration = x * step_size return transition_duration #loop through all of the (specified) labels on the "phone" tier of the current word for v in snippet_pm: if re.match(segments, v.text): if v.t2 != word.t2: # don't analyze [a] word-finally; doing so breaks script t1 = v.t1 - word.t1 t2 = v.t2 - word.t1 followingt2 = ( pm.tier('Phone').next(v) ).t2 # index following segment to eventually find duration of VC sequence print(followingt2) spectrum = get_spectrum(S, t1, t2, step_size=step_size) trans_dur = get_transition_duration( S, v, pm.tier('Phone').next(v), word, step_size=step_size) speakerlist.append(wav.split("_", 1)[0]) agelist.append(wav.split("_", 2)[1]) filenamelist.append(wav) phonelist.append(pm.tier('Phone').label_at(v.center).text) vectorlist.append(spectrum) followinglist.append((pm.tier('Phone').next(v)).text) prevlist.append((pm.tier('Phone').prev(v)).text) wordlist.append(pm.tier('Word').label_at(v.center).text) t1list.append(v.t1) t2list.append(v.t2) notelist.append(pm.tier('Notes').label_at(v.center).text) durlist.append(v.t2 - v.t1) sequencedurlist.append(followingt2 - v.t1) worddurlist.append(word.t2 - word.t1) transdur_list.append(trans_dur) df = pd.DataFrame( OrderedDict( (('Speaker', pd.Series(speakerlist)), ('Age', pd.Series(agelist)), ('Filename', pd.Series(filenamelist)), ('Phone', pd.Series(phonelist)), ('Spectrum', pd.Series(vectorlist)), ('Previous', pd.Series(prevlist)), ('Following', pd.Series(followinglist)), ('Word', pd.Series(wordlist)), ('Note', pd.Series(notelist)), ('phone_t1', pd.Series(t1list)), ('phone_t2', pd.Series(t2list)), ('transition_duration', pd.Series(transdur_list)), ('sequence_duration', pd.Series(sequencedurlist)), ('Phone_duration', pd.Series(durlist)), ('Word_duration', pd.Series(worddurlist))))) df.to_csv(os.path.splitext(soundfile)[0] + '.trans_dur.csv', encoding='utf-8')
# adjust shape of array for pre-converted BPR images if desired if args.convert: conv_frame = e.acquisitions[0].image_reader.get_frame(0) conv_img = test.image_converter.as_bmp(conv_frame) # # # # Comb the experiment object for data, and output numpy arrays to run PCs on. # # # # for idx,a in enumerate(e.acquisitions): # a.gather() print("Now working on {}".format(a.timestamp)) # setup for PC and audio wav = a.abs_ch1_audio_file tg = os.path.splitext(wav)[0] + '.TextGrid' pm = audiolabel.LabelManager(from_file=tg, from_type="praat") v,m = pm.tier('phone').search(vre, return_match=True)[-1] # return last V = target V myword = pm.tier('word').label_at(v.center).text # get R or L timepoints from words, if present, rather than vowels' timepoints # TODO does not change match object returned above - problem? if (pm.tier('phone').next(v).text == "L") or (pm.tier('phone').next(v).text == "R"): v = pm.tier('phone').next(v) if (pm.tier('phone').prev(v).text == "L") or (pm.tier('phone').prev(v).text == "R"): v = pm.tier('phone').prev(v) # collect PC information. if frames is None: if args.convert: frames = np.empty([len(e.acquisitions)] + list(conv_img.shape)) else:
for idx, a in enumerate(e.acquisitions): if frames is None: if args.convert: frames = np.empty([len(e.acquisitions)] + list(conv_img.shape)) else: frames = np.empty([len(e.acquisitions)] + list( a.image_reader.get_frame(0).shape)) * np.nan a.gather() tg = str(a.abs_image_file + ".ch1.TextGrid") fbfile = str(a.abs_image_file + ".ch1.fb") wav = str(a.abs_image_file + ".ch1.wav") pm = audiolabel.LabelManager(from_file=tg, from_type="praat") print(pm) v, m = pm.tier('phone').search( vre, return_match=True)[-1] # return last V = target V print(v) # print('here comes m') # print(m) #print( pm.tier('phone')) word = pm.tier('word').label_at(v.center).text print(word) # print(phone) # phase.append(a.runvars.phase) trial.append(idx) tstamp.append(a.timestamp) phone.append(v.text)
utt, (timestamp + '.TextGrid') ) # may need to change to just TextGrid depending on when data is from wav = os.path.join(utt, (timestamp + '.wav')) if not os.path.isfile(tg): tg = os.path.join(utt, (timestamp + '.bpr.ch1.TextGrid')) if not os.path.isfile(wav): wav = os.path.join( utt, (timestamp + '.bpr.ch1.wav') ) # may need to change to bpr.ch1.wav depending on when data is from # print(wav) # syncfile = os.path.join(utt,(timestamp+'.bpr.sync.txt')) # if not syncfile: # continue # get midpoint time pm = audiolabel.LabelManager(from_file=tg, from_type='praat') for lab in pm.tier('phone'): if (re.match(TARG, lab.text)): print(lab.text) label = lab.text t1 = lab.t1 t2 = lab.t2 mid_frmtime = ((t1 + t2) / 2) subjrframelist = re.compile('.*\.jpg') regex = re.compile('(pc[0-9])|(mean).jpg') stimex = re.compile(stimtext) bmpdir = os.path.join(subbmpdir, timestamp) imlist = [ i for i in os.listdir(bmpdir) if (subjrframelist.search(i) and not regex.search(i) and not stimex.search(i))
# audiolabel: https://github.com/rsprouse/audiolabel import audiolabel # uses argparse module to parse command line arguments of the form 'python alignstrip.py textgrid wav outdir' parser = argparse.ArgumentParser() parser.add_argument('textgrid') parser.add_argument('wav') parser.add_argument('outdir') # stores parsed values of command line arguments in variable args args = parser.parse_args() # create LabelManager from file and file type data = audiolabel.LabelManager(from_file=args.textgrid, from_type="praat") # stores 'word' tier (rather than syllable tier) from audio file in an empty list tier = data.tier('word') result = [] # to make excerpts split by word for i, label in enumerate(tier): # creates new audio file for each word numbered sequentially filename = '{}/excerpts/excerpt{}.wav'.format(args.outdir, i) #uses shlex to split the string for the command line (to run to split the audio file) trimcommand = shlex.split('sox {} {} trim {} ={}'.format( args.wav, filename, label.t1, label.t2)) #uses shlex to split the string for the command line (to compute amplitude) ampcommand = shlex.split('sox {} -n stat'.format(filename)) #uses subprocess to run trimcommand on the command line