def shortTermAnalyses(sound_type, filename, patient_name): fs, signal = wavfile.read(filename) window.refresh() if sound_type == 'speech': s = audioSegmentation.silence_removal(signal, fs, 0.5, 0.1, weight=0.2) signal2 = np.concatenate([signal[int((i[0]+0.1)*fs):int((i[1]+0.1)*fs)] for i in s]) wavfile.write("database/{0}/speechFileSegmented.wav".format(patient_name), fs, signal2) s1 = ShortTermFeatures.feature_extraction(signal[:, 0], fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8] window.refresh() s2 = ShortTermFeatures.feature_extraction(signal[:, 1], fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8] window.refresh() filename = filename[:-4] + "1.wav" fs, signal = wavfile.read(filename) s = audioSegmentation.silence_removal(signal, fs, 0.5, 0.1, weight=0.2) signal2 = np.concatenate([signal[int((i[0]+0.1)*fs):int((i[1]+0.1)*fs)] for i in s]) wavfile.write("database/{0}/speechFileSegmented1.wav".format(patient_name), fs, signal2) s3 = ShortTermFeatures.feature_extraction(signal[:, 0], fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8] window.refresh() s4 = ShortTermFeatures.feature_extraction(signal[:, 1], fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8] window.refresh() n = min(s1.shape[0], s2.shape[0], s3.shape[0], s4.shape[0]) m = min(s1.shape[1], s2.shape[1], s3.shape[1], s4.shape[1]) return (s1[:n, :m]+s2[:n, :m]+s3[:n, :m]+s4[:n, :m])/4 else: return ShortTermFeatures.feature_extraction(signal, fs, 0.05*fs, 0.025*fs, deltas=True)[0][:8]
def extract(x, sr=16000): f_global = [] # 34D short-term feature f = ShortTermFeatures.feature_extraction(x, sr, globalvars.frame_size * sr, globalvars.step * sr) # for pyAudioAnalysis which support python3 if type(f) is tuple: f = f[0] # Harmonic ratio and pitch, 2D hr_pitch = ShortTermFeatures.speed_feature(x, int(sr), int(globalvars.frame_size * sr), int(globalvars.step * sr)) f = np.append(f, hr_pitch.transpose(), axis=0) # Z-normalized f = stats.zscore(f, axis=0) f = f.transpose() f_global.append(f) f_global = sequence.pad_sequences( f_global, maxlen=globalvars.max_len, dtype="float32", padding="post", value=globalvars.masking_value, ) return f_global
def preprocess_audio(data_type): files_dir = os.path.join(path, data_type) files_name = os.listdir(files_dir) mp3_files = filter(lambda file: file.split(".")[-1] == "mp3", files_name) # filter out files in mp3 format mp3_files = list(mp3_files) # pdb.set_trace() data = dict() lens = [] for file in mp3_files: [Fs, x] = audioBasicIO.read_audio_file(os.path.join(files_dir, file)) try: F0, _ = ShortTermFeatures.feature_extraction( x[:, 0], Fs, 0.050 * Fs, 0.025 * Fs) F1, _ = ShortTermFeatures.feature_extraction( x[:, 1], Fs, 0.050 * Fs, 0.025 * Fs) except IndexError: F0, _ = ShortTermFeatures.feature_extraction( x, Fs, 0.050 * Fs, 0.025 * Fs) F1 = np.zeros(F0.shape) feature = np.concatenate([F0, F1], axis=0) seq_len = feature.shape[1] lens.append(seq_len) if seq_len < 611: # if seq_len < 611, pad to 611 new_feature = np.zeros((68, 611)) new_feature[:, :seq_len] = feature feature = new_feature.transpose(0, 1) # (611, 68) utterance_id = file[:-4] data[utterance_id] = {'feature': feature, 'seq_len': seq_len} return lens, data
def generate_data(output_csv): ''' This function will read in the entire liste of audio files and extract features from them and append to output_csv ''' l1 = [] for i in range(1, 822, 1): print("f", i) l = [] [Fs, x] = audioBasicIO.read_audio_file("f" + str(i) + ".wav") F, fm = ShortTermFeatures.feature_extraction(x, Fs, 0.05 * Fs, 0.025 * Fs) for j in range(34): l.append(min(F[j])) l.append(max(F[j])) l.append(mean(F[j])) l.append(stdev(F[j])) l.append(1) l1.append(l) for i in range(1, 822, 1): print("m", i) l = [] [Fs, x] = audioBasicIO.read_audio_file("m" + str(i) + ".wav") F, fm = ShortTermFeatures.feature_extraction(x, Fs, 0.05 * Fs, 0.025 * Fs) for j in range(34): l.append(min(F[j])) l.append(max(F[j])) l.append(mean(F[j])) l.append(stdev(F[j])) l.append(0) l1.append(l) with open(output_csv, "w") as f: writer = csv.writer(f) writer.writerows(l1)
def _get_batches_of_transformed_samples(self, index_array): batch_x = [] for i, j in enumerate(index_array): x = self.x[j] # Augmentation if self.audio_data_generator.white_noise_: x = self.audio_data_generator.white_noise(x) if self.audio_data_generator.shift_: x = self.audio_data_generator.shift(x) if self.audio_data_generator.stretch_: x = self.audio_data_generator.stretch(x) # 34D short-term feature f = ShortTermFeatures.feature_extraction( x, self.sr, globalvars.frame_size * self.sr, globalvars.step * self.sr) # Harmonic ratio and pitch, 2D hr_pitch = ShortTermFeatures.speed_feature( x, self.sr, globalvars.frame_size * self.sr, globalvars.step * self.sr) x = np.append(f, hr_pitch.transpose(), axis=0) # Z-normalized x = stats.zscore(x, axis=0) x = x.transpose() batch_x.append(x) batch_x = sequence.pad_sequences( batch_x, maxlen=globalvars.max_len, dtype="float32", padding="post", value=globalvars.masking_value, ) batch_u = np.full( ( len(index_array), globalvars.nb_attention_param, ), globalvars.attention_init_value, dtype=np.float32, ) if self.y is None: return [batch_u, batch_x] batch_y = self.y[index_array] return [batch_u, batch_x], batch_y
def feature_extraction(INPUTPATH, OUTPATH): try: [Fs, x] = audioBasicIO.read_audio_file(INPUTPATH) try: CH = x.shape[1] except: CH = 1 if CH == 1: c1 = ShortTermFeatures.feature_extraction(x[:,], Fs, 0.050*Fs, 0.025*Fs) channel1 = {} for i in range(0, len(c1[1])): channel1[c1[1][i]] = c1[0][i] channel1 = pd.DataFrame(channel1) channel1.to_json(OUTPATH + "channel1_features.json") result = { 'channel1': json.loads(channel1.to_json()) } if CH == 2: c1 = ShortTermFeatures.feature_extraction(x[:,0], Fs, 0.050*Fs, 0.025*Fs) c2 = ShortTermFeatures.feature_extraction(x[:,1], Fs, 0.050*Fs, 0.025*Fs) channel1 = {} channel2 = {} for i in range(0, len(c1[1])): channel1[c1[1][i]] = c1[0][i] for i in range(0, len(c2[1])): channel2[c2[1][i]] = c2[0][i] channel1 = pd.DataFrame(channel1) channel1.to_json(OUTPATH + "channel1_features.json") channel2 = pd.DataFrame(channel2) channel2.to_json(OUTPATH + "channel2_features.json") result = { 'channel1': json.loads(channel1.to_json()), 'channel2': json.loads(channel2.to_json()) } return json.dumps(result) except Exception as e: return "Error: " + str(e)
def generateFeaturesData(outputData): l1 = [] for i in range(1, 1501, 1): print("Rej", i) try: [Fs, x] = audioBasicIO.read_audio_file("rej_" + str(i) + ".wav") F, f_names = ShortTermFeatures.feature_extraction( x, Fs, 0.05 * Fs, 0.025 * Fs) except: continue k = 0 while k < len(F[0]): l = [] for j in range(34): l.append(np.percentile(F[j, k:k + 399], 25)) l.append(np.percentile(F[j, k:k + 399], 50)) l.append(np.percentile(F[j, k:k + 399], 75)) l.append(np.percentile(F[j, k:k + 399], 95)) l.append(len(F[j]) / 399) l.append(1) l1.append(l) k = k + 399 for i in range(1, 1501, 1): print("Acc", i) try: [Fs, x] = audioBasicIO.read_audio_file("acc_" + str(i) + ".wav") F, f_names = ShortTermFeatures.feature_extraction( x, Fs, 0.05 * Fs, 0.025 * Fs) except: continue k = 0 while k < len(F[0]): l = [] for j in range(34): l.append(np.percentile(F[j, k:k + 399], 25)) l.append(np.percentile(F[j, k:k + 399], 50)) l.append(np.percentile(F[j, k:k + 399], 75)) l.append(np.percentile(F[j, k:k + 399], 95)) l.append(len(F[j]) / 399) l.append(2) l1.append(l) k = k + 399 with open(outputData, "w") as f: writer = csv.writer(f) writer.writerows(l1)
def zcr_sigenergy(INPUTPATH, OUTPATH): try: [Fs, x] = audioBasicIO.read_audio_file(INPUTPATH) try: CH = x.shape[1] except: CH = 1 if CH == 1: F_0, f_names_0 = ShortTermFeatures.feature_extraction(x[:,], Fs, 0.050*Fs, 0.025*Fs) fig = plt.figure(figsize=(18, 8), dpi=200) ax1 = fig.add_subplot(111) ax1.plot(F_0[0,:], label=f_names_0[0]) ax1.plot(F_0[1,:], label=f_names_0[1]) ax1.legend() # Set common labels fig.text(0.5, 0.01, 'Frame no.', ha='center', va='center') fig.text(0.004, 0.5, 'Zero Crossing Rate / Signal Energy', ha='center', va='center', rotation='vertical') ax1.set_title('Channel 1') fig.tight_layout() plt.savefig(OUTPATH + 'zcr_energy.png') plt.close() return "Complete" if CH==2: F_0, f_names_0 = ShortTermFeatures.feature_extraction(x[:,0], Fs, 0.050*Fs, 0.025*Fs) F_1, f_names_1 = ShortTermFeatures.feature_extraction(x[:,1], Fs, 0.050*Fs, 0.025*Fs) fig = plt.figure(figsize=(18, 8), dpi=200) ax1 = fig.add_subplot(211) ax2 = fig.add_subplot(212) ax1.plot(F_0[0,:], label=f_names_0[0]) ax1.plot(F_0[1,:], label=f_names_0[1]) ax2.plot(F_1[0,:], label=f_names_1[0]) ax2.plot(F_1[1,:], label=f_names_1[1]) ax1.legend() ax2.legend() # Set common labels fig.text(0.5, 0.01, 'Frame no.', ha='center', va='center') fig.text(0.004, 0.5, 'Zero Crossing Rate / Signal Energy', ha='center', va='center', rotation='vertical') ax1.set_title('Channel 1') ax2.set_title('Channel 2') fig.tight_layout() plt.savefig(OUTPATH + 'zcr_energy.png') plt.close() return "Complete" except Exception as e: return "Error: " + str(e)
def test_shortTermFeatures(wav_file, plot): [fs, data] = audioBasicIO.read_audio_file(wav_file) print(f'FS={fs} win={0.050*fs} step={0.025*fs}') F, f = STF.feature_extraction_lengthwise(data, fs, 0.050 * fs, 0.025 * fs) if plot: fig = plt.figure(figsize=(12, 6)) ax1 = fig.subplots() ax2 = ax1.twinx() ax3 = ax2.twinx() ax1.plot(F[1, :], color='red', label=f[1]) ax2.plot(F[0, :], color='green', label=f[0]) ax3.plot(data, color='blue', label='data', alpha=0.5) lines1, labels1 = ax1.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() lines3, labels3 = ax3.get_legend_handles_labels() ax3.set_xlabel('time (s)') ax3.legend(lines1 + lines2 + lines3, labels1 + labels2 + labels3, loc=0) ax1.axis('off') ax2.axis('off') #fig.savefig('recording1_shortTermFeatures.png', dpi=200) plt.show() return fig
def exp1(): fs, s = aIO.read_audio_file(AfeExp.wav_file) #IPython.display.display(IPython.display.Audio(wav_file)) duration = len(s) / float(fs) print(f'duration = {duration} seconds') win, step = 0.050, 0.050 [f, fn] = aSF.feature_extraction(s, fs, int(fs * win), int(fs * step)) print(f'{f.shape[1]} frames, {f.shape[0]} short-term features') print('Feature names:') for i, nam in enumerate(fn): print(f'{i}:{nam}') time = np.arange(0, duration - step, win) energy = f[fn.index('energy'), :] mylayout = go.Layout(yaxis=dict(title="frame energy value"), xaxis=dict(title="time (sec)")) ''' plotly.offline.iplot(go.Figure(data=[go.Scatter(x=time, y=energy)], layout=mylayout)) ''' plotly.offline.plot( { 'data': [go.Scatter(x=time, y=energy)], 'layout': mylayout }, auto_open=True)
def get_spectrogram(path, win, step, disable_caching=True, smooth=True): """ get_spectrogram() is a wrapper to pyAudioAnalysis.ShortTermFeatures.spectrogram() with a caching functionality :param path: path of the WAV file to analyze :param win: short-term window to be used in spectrogram calculation :param step: short-term step to be used in spectrogram calculation :return: spectrogram matrix, time array, freq array and sampling freq """ fs, s = io.read_audio_file(path) cache_name = path + "_{0:.6f}_{1:.6f}.npz".format(win, step) if not disable_caching and os.path.isfile(cache_name): print("Loading cached spectrogram") npzfile = np.load(cache_name) spec_val = npzfile["arr_0"] spec_time = npzfile["arr_1"] spec_freq = npzfile["arr_2"] else: print("Computing spectrogram") spec_val, spec_time, spec_freq = sF.spectrogram( s, fs, round(fs * win), round(fs * step), False, True) if not disable_caching: np.savez(cache_name, spec_val, spec_time, spec_freq) # f, f_n = sF.feature_extraction(s, fs, win * fs / 1000.0, # step * fs / 1000.0, deltas=True) if smooth: spec_val = ndimage.median_filter(spec_val, (2, 3)) return spec_val, np.array(spec_time), np.array(spec_freq), fs
def getTXT(file): pattern = re.compile(r'([^<>/\\\|:""\*\?]+)\.\w+$') fileName = pattern.findall(file)[0] # mp4 to wav wav_filename = fileName + '.wav' AudioSegment.from_file(file).export('store/audioStore/' + wav_filename, format='wav') # wav to txt Fs, x = loadAudio('store/audioStore/' + wav_filename) print(Fs, x) st_features, st_features_name = sF.feature_extraction(x, Fs, 0.050 * Fs, 0.025 * Fs, deltas=False) outputFile = open('store/audioEvaluationTxt/' + fileName + '.txt', 'w') for col in range(st_features.shape[1]): sampleFeature = [] for row in range(st_features.shape[0]): feature = st_features[row][col] sampleFeature.append(feature) sampleString = str(sampleFeature).replace('[', '').replace(']', '') outputFile.write(sampleString + '\n') outputFile.close() outPath = 'store/audioEvaluationTxt/' + fileName + '.txt' return outPath
def fileChromagramWrapper(wav_file): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.read_audio_file(wav_file) x = audioBasicIO.stereo_to_mono(x) specgram, TimeAxis, FreqAxis = sF.chromagram(x, fs, round(fs * 0.040), round(fs * 0.040), True)
def get_features(input_file): ''' Given an input .wav file, this function will return a list of lists corresponding to features of each of its chunks reject is 1 accept is 2; we will not append any target label and just use svm_score to get accept(2) or reject(1) here there is no need to break into chunks; this was required when time was a priority ''' data, samplerate = sf.read(input_file) l1 = [] [Fs, x] = audioBasicIO.read_audio_file(input_file) F,f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.05*Fs, 0.025*Fs) k = 0 l = [] for j in range(34): l.append(np.percentile(F[j, :], 25)) l.append(np.percentile(F[j, :], 50)) l.append(np.percentile(F[j, :], 75)) l.append(np.percentile(F[j, :], 95)) l.append(len(F[j])/399) # if fname.startswith("acc"): # l.append(2) # else: # l.append(1) l1.append(l) return l1
def ExtractSpec(id): [Fs, x] = audioBasicIO.read_audio_file( "/Volumes/Macintosh HD - Data/Users/admin/Documents/HD Drive/DataProjects/DepressionData/audio/{}_AUDIO.wav" .format(id)) F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.050 * Fs, 0.025 * Fs) return F
def ExtractSpec(id): with ZipFile("/Users/aravind/Downloads/{}_P.zip".format(id), 'r') as zip: audio = zip.extract("{}_AUDIO.wav".format(id), 'audio') [Fs, x] = audioBasicIO.read_audio_file("audio/{}_AUDIO.wav".format(id)) F, f_names, time = ShortTermFeatures.spectrogram( x, Fs, 0.050 * Fs, 0.025 * Fs) return F
def test_feature_extraction_short(): [fs, x] = audioBasicIO.read_audio_file("test_data/1_sec_wav.wav") F, f_names = ShortTermFeatures.feature_extraction(x, fs, 0.050 * fs, 0.050 * fs) assert F.shape[1] == 20, "Wrong number of mid-term windows" assert F.shape[0] == len(f_names), "Number of features and feature " \ "names are not the same"
def extract_feature(file_name): [Fs, x] = audioBasicIO.read_audio_file(file_name) if x.ndim == 2: x = x[:, 0] F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.025 * Fs, 0.010 * Fs) return F.T
def extract_extract_audioAnalysis(audio_file, chuncksize=1): [Fs, x] = audioBasicIO.read_audio_file(audio_file) x = audioBasicIO.stereo_to_mono(x) overlap = chuncksize * Fs F, f_names = ShortTermFeatures.feature_extraction(x, Fs, Fs, overlap) # takes approx. 2.5 mins to comple # return Zero Crossing Rate, Spectral Centroid, Spectral Spread, Spectral Entropy, Spectral Flux, Spectral Rolloff return F[0], F[3], F[4], F[5], F[6], F[7]
def beatExtractionWrapper(wav_file, plot): if not os.path.isfile(wav_file): raise Exception("Input audio file not found!") [fs, x] = audioBasicIO.read_audio_file(wav_file) F, _ = sF.feature_extraction(x, fs, 0.050 * fs, 0.050 * fs) bpm, ratio = aF.beat_extraction(F, 0.050, plot) print("Beat: {0:d} bpm ".format(int(bpm))) print("Ratio: {0:.2f} ".format(ratio))
def extract_dataset(data, nb_samples, dataset, save=True): f_global = [] i = 0 for (x, Fs) in data: # 34D short-term feature f = ShortTermFeatures.feature_extraction(x, Fs, globalvars.frame_size * Fs, globalvars.step * Fs) # for pyAudioAnalysis which support python3 if type(f) is tuple: f = f[0] # Harmonic ratio and pitch, 2D hr_pitch = ShortTermFeatures.speed_feature(x, Fs, globalvars.frame_size * Fs, globalvars.step * Fs) f = np.append(f, hr_pitch.transpose(), axis=0) # Z-normalized f = stats.zscore(f, axis=0) f = f.transpose() f_global.append(f) sys.stdout.write("\033[F") i = i + 1 print("Extracting features " + str(i) + "/" + str(nb_samples) + " from data set...") f_global = sequence.pad_sequences( f_global, maxlen=globalvars.max_len, dtype="float32", padding="post", value=globalvars.masking_value, ) if save: print("Saving features to file...") pickle.dump(f_global, open(dataset + "_features.p", "wb")) return f_global
def pyaudioextraction(path, fs_factor, overlap_factor, stereo=False): try: [Fs, x] = audioBasicIO.read_audio_file(path) if stereo: x = audioBasicIO.stereo_to_mono(x) F, f_names = ShortTermFeatures.feature_extraction(x, Fs, fs_factor * Fs, overlap_factor * Fs) return F.T.flatten() except: return None
def function(row, column): global interval interval += 1 url = df["Episode {}".format(column)][row] if url is None: return if row == 11861: return mp3 = '{}{}.mp3'.format(row, column) wav = '{}{}.wav'.format(row, column) r = requests.get(url, allow_redirects=True) open(mp3, 'wb').write(r.content) # Export mp3 to wav and remove mp3 sound = AudioSegment.from_mp3(mp3) sound.export(wav, format="wav") os.remove(mp3) # Read wav info and remove it [Fs, x] = audioBasicIO.read_audio_file(wav) if len(x.shape) == 2: x = np.mean(x, axis=1) os.remove(wav) # Extract features print("Start {}{} at {}".format(row, column, datetime.datetime.now().time())) F = 0 f_names = 0 if len(x) > 6 * Fs * 60: x = x[5 * Fs * 60:6 * Fs * 60] F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.050 * Fs, 0.025 * Fs) _var = [] _mean = [] for f in F: _var.append(f.var()) _mean.append(f.mean()) var_list[row - offset] = _var mean_list[row - offset] = _mean print("End {}{} at {}".format(row, column, datetime.datetime.now().time())) if interval % 2 == 0: pd.DataFrame(var_list, columns=column_names).to_csv( r'./vars{}.csv'.format(offset), index=False, header=True) pd.DataFrame(mean_list, columns=column_names).to_csv( r'./means{}.csv'.format(offset), index=False, header=True)
def extract_feature(file_name): [Fs, x] = audioBasicIO.read_audio_file(file_name) if x.ndim == 2: x = x[:, 0] F, f_names = ShortTermFeatures.feature_extraction(x, Fs, 0.025 * Fs, 0.010 * Fs) F_mean = F.mean(axis=1) F_std = F.mean(axis=1) hfs = np.hstack([F_mean, F_std]) return hfs.T
def AnalyzeData(data): F, f_names = ShortTermFeatures.feature_extraction(data, RATE, SAMPLE_DUR * RATE, (SAMPLE_DUR / 2) * RATE, deltas=False) # F is the data, so save that to the CSV file for audio data #for i in range(len(F)): # print(f_names[i],F[i]) return F, f_names
def preProcess(fileName): [Fs, x] = audioBasicIO.read_audio_file(fileName) #A if (len(x.shape) > 1 and x.shape[1] == 2): x = np.mean(x, axis=1, keepdims=True) else: x = x.reshape(x.shape[0], 1) F, f_names = ShortTermFeatures.feature_extraction(x[:, 0], Fs, 0.050 * Fs, 0.025 * Fs) return (f_names, F)
def get_spectrogram_buffer(s, fs, win, step, smooth=True): """ get_spectrogram_buffer() same as get_spectrogram() but input is an audio buffer, instead of an audio file """ spec_val, spec_time, spec_freq = sF.spectrogram(s, fs, round(fs * win), round(fs * step), False, True) if smooth: spec_val = ndimage.median_filter(spec_val, (2, 3)) return spec_val, np.array(spec_time), np.array(spec_freq), fs
def generate_CompareGraph(): [Fs, x] = audio.audioBasicIO.read_audio_file( "/Users/zhouhan/Downloads/河图 - 风起天阑.mp3") # 先合并成单声道 x = aio.stereo_to_mono(x) # F 是n*...的,一行是一个feature F, _ = short.feature_extraction(x, Fs, 0.50 * Fs, 0.25 * Fs) F = np.transpose(F) frame = pd.DataFrame(F) frame.head() fig = px.line(frame) fig.show()
def FindAudioShots(framechange_array, audio_path): features = [1] [Fs, x] = audioBasicIO.read_audio_file(audio_path) x = audioBasicIO.stereo_to_mono(x) frame_size = (Fs // 30) F, f_names = ShortTermFeatures.feature_extraction(x, Fs, frame_size, frame_size, deltas=False) astd = [] aave = [] for i in range(len(features)): astd.append(np.std(F[features[i],:])) aave.append(np.average(F[features[i],:])) which_shots = np.zeros(len(F[features[0],:])).flatten() # print(which_shots.shape) for i in range(len(F[features[0],:])): for j in range(len(features)): if (abs(F[features[j],:][i]-aave[j]) > astd[j] * 3.5): which_shots[i] += F[features[j],:][i] audioshotchange_list = [] prev_val = 0.0 last_start = 0 for i in range(len(F[1,:])): # print(which_shots[i]) if (prev_val == 0.0 and which_shots[i] > 0.0): last_start = i if (prev_val > 0.0 and which_shots[i] == 0.0): audioshotchange_list.append([last_start, i, which_shots[last_start]]) prev_val = which_shots[i] audio_array = np.zeros(len(framechange_array)-1) for x in range (0, len(framechange_array)-1): first_frame = framechange_array[x] last_frame = framechange_array[x+1] for y in range(len(audioshotchange_list)): if audioshotchange_list[y][0] >= first_frame and audioshotchange_list[y][0] < last_frame: audio_array[x] += audioshotchange_list[y][2] audio_array[x] /= (last_frame - first_frame) audio_array = preprocessing.minmax_scale(audio_array, feature_range=(0, 1)) audio_array = [round(num, 3) for num in audio_array] return(audio_array)
def get_short_features(self, file_path): try: Fs, x = self.read_wav(file_path) except: print('fail to extract short ' + file_path + ',passed.') return pd.DataFrame() audio_name = file_path.split('/')[-1] #emotion=file_path.split('/')[-2] #outpath=out_path+emotion+'_'+audio_name[:-4]+'.txt' #[Fs, x] = audioBasicIO.read_audio_file(file_path) F_s, F_name = ShortTermFeatures.feature_extraction( x, Fs, 0.05 * Fs, 0.025 * Fs) #F_m,F_s,F_name=self.mid_feature_extraction(x,Fs,1.0*Fs,0.5*Fs,0.05*Fs,0.025*Fs) short = pd.DataFrame(F_s.T) short['id'] = file_path.split('/')[-1] return short