def plot(data, title=None, x_label='Time (s)', y_label='Amplitude', size=(10, 6), caption=None, fig_name=None, show=False, close=True): if size: plt.figure(figsize=(10, 6), frameon=True) plt.plot(list(range(0, len(data))), data) plt.ylabel(y_label) plt.xlabel(x_label) plt.title(title) # Remove a margem no eixo x plt.margins(x=0) if caption: plt.figtext(0.5, 0.01, caption, wrap=True, horizontalalignment='center') if fig_name: Directory.create_directory(fig_name, True) plt.savefig(fig_name, transparent=False) if show: plt.show() if close: plt.close()
def write(path, data, rate): import soundfile as sf from deep_audio import Directory Directory.create_directory(path, file=True) sf.write(path, data, rate, subtype='PCM_16')
def plot_subplots(audio, mfccs, lpccs, rate, title=None, size_multiplier=2, cmap='magma', caption=None, fig_name=None, show=False): from matplotlib import rcParams, rcParamsDefault small_size = 8 medium_size = 10 bigger_size = 12 image_size = (10 * size_multiplier, 6 * size_multiplier) # controls default text sizes plt.rc('font', size=small_size * size_multiplier) # fontsize of the axes title plt.rc('axes', titlesize=small_size * size_multiplier) # fontsize of the x and y labels plt.rc('axes', labelsize=medium_size * size_multiplier) # fontsize of the tick labels plt.rc('xtick', labelsize=small_size * size_multiplier) # fontsize of the tick labels plt.rc('ytick', labelsize=small_size * size_multiplier) plt.rc('legend', fontsize=small_size * size_multiplier) # legend fontsize # fontsize of the figure title plt.rc('figure', titlesize=bigger_size * size_multiplier) plt.subplots(2, 2, figsize=image_size) plt.subplots_adjust(left=0.125, right=0.9, bottom=0.1, top=0.9, wspace=0.3, hspace=0.3) plt.suptitle(title) plt.subplot(2, 2, 1) plot_audio(audio, rate, close=False, size=None) plt.subplot(2, 2, 2) plot_spectrogram( audio, rate, cmap=cmap, close=False, size=None) plt.subplot(2, 2, 3) plot_cepstrals( data=lpccs, y_label='LPCC Index', cmap=cmap, size=None, close=False) plt.subplot(2, 2, 4) plot_cepstrals( data=mfccs, y_label='MFCC Index', cmap=cmap, size=None, close=False) if caption: plt.figtext(0.5, 0.01, caption, wrap=True, horizontalalignment='center') if fig_name: Directory.create_directory(fig_name, True) plt.savefig(fig_name, transparent=False) if show: plt.show() plt.close() # Reseta todo o estilo configurado no inicio da função rcParams.update(rcParamsDefault)
def plot_cepstrals(data, title=None, x_label='Frame Index', y_label='Index', cmap='magma', size=(10, 6), caption=None, fig_name=None, show=False, close=True): if size: plt.figure(figsize=(10, 6), frameon=True) plt.imshow(data.T, origin='lower', aspect='auto', cmap=cmap, interpolation='nearest') plt.title(title) plt.xlabel(x_label) plt.ylabel(y_label) # plt.colorbar(format='%+2.0f') # plt.clim(vmin, vmax) if caption: plt.figtext(0.5, 0.01, caption, wrap=True, horizontalalignment='center') if fig_name: Directory.create_directory(fig_name, True) plt.savefig(fig_name, transparent=False) if show: plt.show() if close: plt.close()
def create_json_file(file, data, indent=2, cls=None): from deep_audio import Directory import json directory = '/'.join(file.split('/')[:-1]) Directory.create_directory(directory) with open(file, "w") as fp: json.dump(data, fp, indent=indent, cls=cls)
def plot_spectrogram(data, rate, n_fft=1024, title=None, x_label='Time (s)', y_label='Frequency (kHz)', cmap='magma', size=(10, 6), caption=None, fig_name=None, show=False, close=True): if size: plt.figure(figsize=(10, 6), frameon=True) plt.specgram(data, NFFT=n_fft, Fs=rate, cmap=cmap) plt.title(title) plt.ylabel(y_label) plt.xlabel(x_label) if caption: plt.figtext(0.5, 0.01, caption, wrap=True, horizontalalignment='center') if fig_name: Directory.create_directory(fig_name, True) plt.savefig(fig_name, transparent=False) if show: plt.show() if close: plt.close()
def object_to_attention(filename, attrs, files): from deep_audio import Directory data = { 'labels': [], 'attrs': [], 'mapping': [file.replace('.wav', '') for _, file in enumerate(files)] } for i in attrs: data['attrs'].extend(i['attrs']) data['labels'].extend(i['labels']) rows = [] for info, i in enumerate(data['labels']): row = f'{info} qid:{info} ' info_attrs = flatten_matrix(data['attrs'][i]) for info_attr, j in enumerate(info_attrs): row += f'{j}:{info_attr} ' rows.append(row) Directory.create_file(filename, rows) del data
def selection(folder, valid_size=0.25, test_size=0.2, random_state=42, flat=False, squeeze=False, mapping=False): from deep_audio import Directory from sklearn.model_selection import train_test_split from numpy import squeeze X, y, labels = Directory.load_json_data(folder) if flat: X = flatten_matrix(X) if squeeze == True: X = squeeze(X, axis=3) if test_size == 0: if mapping: return X, y, labels return X, y X_train, X_test, y_train, y_test = train_test_split( X, y, stratify=y, test_size=test_size, random_state=random_state) if valid_size == 0: return X_train, X_test, y_train, y_test X_train, X_valid, y_train, y_valid = train_test_split( X_train, y_train, stratify=y_train, test_size=valid_size, random_state=random_state) return X_train, X_valid, X_test, y_train, y_valid, y_test
import matplotlib.pyplot as plt from numpy import lib, max from deep_audio import Directory, JSON, Process, Terminal args = Terminal.get_args() # %% model_algo = 'perceptron' language = args['language'] or 'portuguese' library = args['representation'] or 'psf' n_people = args['people'] or None n_segments = args['segments'] or None n_rate = 24000 random_state = 42 filename_ps = Directory.verify_people_segments(people=n_people, segments=n_segments) # %% global X_train, X_valid, X_test, y_train, y_valid, y_test DATASET_PATH = Directory.processed_filename(language, library, n_rate, n_people, n_segments) # %% # SPLIT DOS DADOS X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection( DATASET_PATH) mapping = set(y_train) # %%
language = args['language'] method = args['method'] library = args['representation'] people = args['people'] segments = args['segments'] normalization = args['normalization'] flat = args['flat'] augment = args['augmentation'] sampling_rate = 24000 random_state = 42 epochs = 2000 batch_size = 128 # %% file_path = Directory.processed_filename(language, library, sampling_rate, people, segments, augment) # %% X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection( file_path, flat=flat) param_grid = {} # %% if normalization == 'minmax': from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() X_train = scaler.fit_transform(X_train.reshape( -1, X_train.shape[-1])).reshape(X_train.shape) X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape( X_test.shape)
from deep_audio import Directory, Process, Terminal, Model # %% args = Terminal.get_args() language = args['language'] library = args['representation'] people = args['people'] segments = args['segments'] normalization = args['normalization'] augment = args['augmentation'] sampling_rate = 24000 random_state = 42 # %% global X_train, X_valid, X_test, y_train, y_valid, y_test file_path = Directory.processed_filename( language, library, sampling_rate, people, segments, augment) # %% X_train, X_valid, X_test, y_train, y_valid, y_test = Process.selection( file_path, flat=True) if normalization == 'minmax': from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() X_train = scaler.fit_transform( X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape) X_test = scaler.transform( X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape) elif normalization == 'standard':
args = Terminal.get_args() language = 'portuguese' method = 'svm' library = 'psf' people = args['people'] segments = args['segments'] normalization = args['normalization'] augment = args['augmentation'] sampling_rate = 24000 random_state = 42 filename_holder = Directory.model_filename(method=method, language=language, library=library, normalization=normalization, augmentation=augment, json=False, models=True) info = json.load(open(filename_holder + 'info.json', 'r')) scaler = load(open(filename_holder + 'scaler.pkl', 'rb')) model = load(open(filename_holder + 'model.h5', 'rb')) signal, rate = librosa.load(args['inferencia'], sr=sampling_rate) # signal = Audio.trim(signal) segment_time = 5 signal = signal[:len(signal) - len(signal) % (rate * segment_time)]
def mixed_selection(first_folder, second_folder, third_folder, fourth_folder, lm_validation=False, lm_test=False, rm_validation=False, rm_test=False, valid_size=0.25, test_size=0.2, random_state=42): global X_train, y_train, X_valid, y_valid, X_test, y_test from deep_audio import Directory from sklearn.model_selection import train_test_split from numpy import concatenate import numpy as np X_first, y_first, _ = Directory.load_json_data(first_folder) X_second, y_second, _ = Directory.load_json_data(second_folder) X_third, y_third, _ = Directory.load_json_data(third_folder) X_fourth, y_fourth, _ = Directory.load_json_data(fourth_folder) X_first = flatten_matrix(X_first) X_second = flatten_matrix(X_second) X_third = flatten_matrix(X_third) X_fourth = flatten_matrix(X_fourth) X_train_first, X_test_first, y_train_first, y_test_first = train_test_split( X_first, y_first, stratify=y_first, test_size=test_size, random_state=random_state) X_train_first, X_valid_first, y_train_first, y_valid_first = train_test_split( X_train_first, y_train_first, stratify=y_train_first, test_size=valid_size, random_state=random_state) X_train_second, X_test_second, y_train_second, y_test_second = train_test_split( X_second, y_second, stratify=y_second, test_size=test_size, random_state=random_state) X_train_second, X_valid_second, y_train_second, y_valid_second = train_test_split( X_train_second, y_train_second, stratify=y_train_second, test_size=valid_size, random_state=random_state) X_train_first = concatenate((X_train_first, X_train_second), axis=1) y_train = y_train_first if not rm_validation: X_valid_first = X_valid_first else: X_valid_first = concatenate((X_valid_first, X_valid_second), axis=1) y_valid = y_valid_first if not rm_test: X_test_first = X_test_first else: X_test_first = concatenate((X_test_first, X_test_second), axis=1) y_test = y_test_first X_train_third, X_test_third, y_train_third, y_test_third = train_test_split( X_third, y_third, stratify=y_third, test_size=test_size, random_state=random_state) X_train_third, X_valid_third, y_train_third, y_valid_third = train_test_split( X_train_third, y_train_third, stratify=y_train_third, test_size=valid_size, random_state=random_state) X_train_fourth, X_test_fourth, y_train_fourth, y_test_fourth = train_test_split( X_fourth, y_fourth, stratify=y_fourth, test_size=test_size, random_state=random_state) X_train_fourth, X_valid_fourth, y_train_fourth, y_valid_fourth = train_test_split( X_train_fourth, y_train_fourth, stratify=y_train_fourth, test_size=valid_size, random_state=random_state) X_train_third = concatenate((X_train_third, X_train_fourth), axis=1) if not rm_validation: X_valid_third = X_valid_third else: X_valid_third = concatenate((X_valid_third, X_valid_fourth), axis=1) if not rm_test: X_test_third = X_test_third else: X_test_third = concatenate((X_test_third, X_test_fourth), axis=1) X_train = concatenate((X_train_first, X_train_third), axis=0) y_train = concatenate( (y_train_first, y_train_third + np.max(y_train_first) + 1), axis=0) if not lm_validation: X_valid = X_valid_first y_valid = y_valid_first else: X_valid = concatenate((X_valid_first, X_valid_third), axis=0) y_valid = concatenate( (y_valid_first, y_valid_third + np.max(y_valid_first) + 1), axis=0) if not lm_test: X_test = X_test_first y_test = y_test_first else: X_test = concatenate((X_test_first, X_test_third), axis=0) y_test = concatenate( (y_test_first, y_test_third + np.max(y_test_first) + 1), axis=0) return X_train, X_valid, X_test, y_train, y_valid, y_test
# %% args = Terminal.get_args() language = args['language'] method = 'svm' library = args['representation'] people = args['people'] segments = args['segments'] normalization = args['normalization'] augment = args['augmentation'] sampling_rate = 24000 random_state = 42 # %% file_path = Directory.processed_filename( language, library, sampling_rate, people, segments, augment) # %% X_train, y_train, mapping = Process.selection( file_path, valid_size=0, test_size=0, mapping=True, flat=True) # %% if normalization == 'minmax': from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() X_train = scaler.fit_transform( X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape) elif normalization == 'standard': from sklearn.preprocessing import StandardScaler
library = args['representation'] people = args['people'] segments = args['segments'] sampling_rate = 24000 random_state = 42 # language = 'mixed' # library = 'psf' # people = None # segments = None # sampling_rate = 24000 # random_state = 42 # %% global X_train, X_valid, X_test, y_train, y_valid, y_test file_path = Directory.processed_filename(language, library, sampling_rate, people, segments) # %% if language == 'mixed' and library == 'mixed': first_folder = Directory.processed_filename('portuguese', 'psf', sampling_rate, None, None) second_folder = Directory.processed_filename('portuguese', 'melbanks', sampling_rate, None, None) third_folder = Directory.processed_filename('english', 'psf', sampling_rate, people, segments) fourth_folder = Directory.processed_filename('english', 'melbanks', sampling_rate, people, segments) X_train, X_valid, X_test, y_train, y_valid, y_test = Process.mixed_selection( first_folder,
# quantidade de audios n_audios = args['people'] or None # bibliotecas library = args['representation'] # lingua language = args['language'] or 'portuguese' # normalização do sinal normalization = args['normalization'] or 'nonorm' # flat processing flat = args['flat'] # caminho para os audios path = f'{language}/audios/{sampling_rate}' # augmentation augment = args['augmentation'] f = Directory.filenames(path) # %% def _noise(sample, rate): noise_max = np.max(sample) return Augmentation.noise_addition( sample, random.uniform(noise_max * 0.01, noise_max * 0.1)) def _cut(sample, rate): cut_seconds = random.randint(sample.shape[0] * 0.2, sample.shape[0] * 0.6) pos_cut = random.randint(sample.shape[0] * 0.1, sample.shape[0] * 0.9) return Augmentation.cut_signal(sample, pos_cut, cut_seconds)
def mixed_selection_representation(first_folder, second_folder, validation=False, test=False, valid_size=0.25, test_size=0.2, random_state=42): global X_train, y_train, X_valid, y_valid, X_test, y_test from deep_audio import Directory from sklearn.model_selection import train_test_split from numpy import concatenate X_portuguese, y_portuguese, _ = Directory.load_json_data(first_folder) X_english, y_english, _ = Directory.load_json_data(second_folder) X_portuguese = flatten_matrix(X_portuguese) X_english = flatten_matrix(X_english) X_train_pt, X_test_pt, y_train_pt, y_test_pt = train_test_split( X_portuguese, y_portuguese, stratify=y_portuguese, test_size=test_size, random_state=random_state) X_train_pt, X_valid_pt, y_train_pt, y_valid_pt = train_test_split( X_train_pt, y_train_pt, stratify=y_train_pt, test_size=valid_size, random_state=random_state) X_train_en, X_test_en, y_train_en, y_test_en = train_test_split( X_english, y_english, stratify=y_english, test_size=test_size, random_state=random_state) X_train_en, X_valid_en, y_train_en, y_valid_en = train_test_split( X_train_en, y_train_en, stratify=y_train_en, test_size=valid_size, random_state=random_state) X_train = concatenate((X_train_pt, X_train_en), axis=1) y_train = y_train_pt if not validation: X_valid = X_valid_pt else: X_valid = concatenate((X_valid_pt, X_valid_en), axis=1) y_valid = y_valid_pt if not test: X_test = X_test_pt else: X_test = concatenate((X_test_pt, X_test_en), axis=1) y_test = y_test_pt return X_train, X_valid, X_test, y_train, y_valid, y_test
def mixed_selection_language(portuguese_folder, english_folder, validation=False, test=False, valid_size=0.25, test_size=0.2, random_state=42, flat=False, squeeze=False): global X_train, y_train, X_valid, y_valid, X_test, y_test from deep_audio import Directory from sklearn.model_selection import train_test_split from numpy import concatenate, squeeze, max X_portuguese, y_portuguese, _ = Directory.load_json_data(portuguese_folder) X_english, y_english, _ = Directory.load_json_data(english_folder) if flat: X_portuguese = flatten_matrix(X_portuguese) X_english = flatten_matrix(X_english) # if squeeze: # X_portuguese = squeeze(X_portuguese, axis=3) # X_english = squeeze(X_english, axis=3) X_train_pt, X_test_pt, y_train_pt, y_test_pt = train_test_split( X_portuguese, y_portuguese, stratify=y_portuguese, test_size=test_size, random_state=random_state) X_train_pt, X_valid_pt, y_train_pt, y_valid_pt = train_test_split( X_train_pt, y_train_pt, stratify=y_train_pt, test_size=valid_size, random_state=random_state) X_train_en, X_test_en, y_train_en, y_test_en = train_test_split( X_english, y_english, stratify=y_english, test_size=test_size, random_state=random_state) X_train_en, X_valid_en, y_train_en, y_valid_en = train_test_split( X_train_en, y_train_en, stratify=y_train_en, test_size=valid_size, random_state=random_state) X_train = concatenate((X_train_pt, X_train_en), axis=0) y_train = concatenate((y_train_pt, y_train_en + max(y_train_pt) + 1), axis=0) if not validation: X_valid = X_valid_pt y_valid = y_valid_pt else: X_valid = concatenate((X_valid_pt, X_valid_en), axis=0) y_valid = concatenate((y_valid_pt, y_valid_en + max(y_valid_pt) + 1), axis=0) if not test: X_test = X_test_pt y_test = y_test_pt else: X_test = concatenate((X_test_pt, X_test_en), axis=0) y_test = concatenate((y_test_pt, y_test_en + max(y_test_pt) + 1), axis=0) return X_train, X_valid, X_test, y_train, y_valid, y_test
#%% args = Terminal.get_args(sys.argv[1:]) # %% num_cores = multiprocessing.cpu_count() language = args['language'] or 'portguese' origin_path = f'base_{language}' dest_path = f'{language}/audios' s_rate = [24000] n_audios = args['people'] or None print(dest_path) # %% f = Directory.filenames_recursive(origin_path) def process_directory(dir, n_rate): signal = [] for j, audioname in enumerate(f[dir]): holder_signal, sr = Audio.read(f'{origin_path}/{dir}/{audioname}', sr=n_rate) signal.extend(Audio.trim(holder_signal, 20)) signal = array(signal) Audio.write(f'{dest_path}/{n_rate}/{dir}.wav', signal, n_rate)
language = args['language'] or 'portuguese' library = args['representation'] or 'stft' people = args['people'] or None segments = args['segments'] or None sampling_rate = 24000 random_state = 42 normalization = args['normalization'] or 'nonorm' flat = args['flat'] epochs = 500 batch_size = 128 # %% global X_train, X_valid, X_test, y_train, y_valid, y_test file_path = Directory.processed_filename( language, library, sampling_rate, people, segments) # %% if language == 'mixed' and library == 'mixed': first_folder = Directory.processed_filename( 'portuguese', 'psf', sampling_rate, None, None) second_folder = Directory.processed_filename( 'portuguese', 'melbanks', sampling_rate, None, None) third_folder = Directory.processed_filename( 'english', 'psf', sampling_rate, people, segments) fourth_folder = Directory.processed_filename( 'english', 'melbanks', sampling_rate, people, segments) X_train, X_valid, X_test, y_train, y_valid, y_test = Process.mixed_selection( first_folder, second_folder, third_folder, fourth_folder, lm_validation=False, lm_test=False,