Exemplo n.º 1
0
def audio_mixup_augmentation(paths_list, cleared_audio_dir, alpha, num_classes,
                             sample_rate, num_coeffs, num_timesteps,
                             num_to_mix):
    print('Started data augmentation using audio mixup...')
    time_start = datetime.datetime.now()
    mixup_features = []
    mixup_labels = []

    paths_list = list(dict.fromkeys(paths_list))  # remove all duplicates
    num_records = len(paths_list)
    np.random.shuffle(paths_list)
    audio, labels = load_cleared_audio(cleared_audio_dir, paths_list)
    labels = to_categorical(labels, num_classes)

    print('Augmentation:')
    num_same_class = 0
    for idx in range(num_records - num_to_mix + 1):
        for mix_idx in range(num_to_mix - 1):
            lam = np.random.beta(a=alpha, b=alpha)
            audio_size = len(audio[idx]) if len(audio[idx]) < len(
                audio[idx + mix_idx + 1]) else len(audio[idx + mix_idx + 1])
            mixed_audio = lam * audio[idx][:audio_size] + (
                1 - lam) * audio[idx + mix_idx + 1][:audio_size]
            mixed_lab = lam * labels[idx] + (1 - lam) * labels[idx + mix_idx +
                                                               1]

            extractor = FeatureExtractor(mixed_audio, sample_rate, num_coeffs)
            mixed_feat = extractor.extract_log_mel_filterbank_energies()
            timestep_samples = compose_samples_with_timesteps(
                mixed_feat, num_timesteps)
            mixup_features.extend(timestep_samples)
            stretched_label = np.tile(mixed_lab,
                                      (timestep_samples.shape[0], 1))
            mixup_labels.extend(stretched_label)

            if np.array_equal(labels[idx], labels[idx + mix_idx + 1]):
                num_same_class += 1

        print_progress_bar(idx,
                           num_records - num_to_mix,
                           prefix='{}/{}'.format(idx + 1,
                                                 num_records - num_to_mix + 1),
                           suffix='complete')

    time_end = datetime.datetime.now()
    print('Elapsed time: ', time_end - time_start)
    ratio = num_same_class / (num_to_mix * num_records)
    print('%.2f%% are mixed from the same class,\
          \n%.2f%% are mixed from different classes' % (ratio, 1 - ratio))
    return np.array(mixup_features), np.array(mixup_labels)
Exemplo n.º 2
0
def extract_mel_filterbank_energies(data, sample_rate, num_coeffs):
    extractor = FeatureExtractor(data_array=data,
                                 sample_rate=sample_rate,
                                 num_of_coeffs=num_coeffs)
    features = extractor.extract_log_mel_filterbank_energies()
    return features