Exemplo n.º 1
0
    def save_mel(self, wav_path, basename, key_name, mel_path):
        if not os.path.isfile("{}/{}_{}.png".format(mel_path, os.path.basename(basename), key_name)):
            (audio, _) = read_audio(wav_path, cfg.sample_rate)
            ham_win = np.hamming(cfg.n_window)

            spec = librosa.stft(
                audio,
                n_fft=cfg.n_window,
                hop_length=cfg.hop_length,
                window=ham_win,
                center=True,
                pad_mode='reflect'
            )

            mel_spec = librosa.feature.melspectrogram(
                S=np.abs(spec),  # amplitude, for energy: spec**2 but don't forget to change amplitude_to_db.
                sr=cfg.sample_rate,
                n_mels=cfg.n_mels,
                fmin=cfg.f_min, fmax=cfg.f_max,
                htk=False, norm=None)

            mel_spec = librosa.amplitude_to_db(mel_spec)  # 10 * log10(S**2 / ref), ref default is 1
            # mel_spec = mel_spec.T
            mel_spec = mel_spec.astype(np.float32)

            librosa.display.specshow(mel_spec, x_axis='frames', y_axis='hz')
            plt.title("{}_{}".format(basename, key_name))
            cb = plt.colorbar()
            cb.set_label("db")
            plt.savefig("{}/{}_{}.png".format(mel_path, os.path.basename(basename), key_name))
            plt.close()
            print("save_mel:{}".format(basename))
Exemplo n.º 2
0
    def extract_features_from_meta(self,
                                   csv_audio,
                                   feature_dir,
                                   subpart_data=None):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the path to the directory where the features are
            subpart_data: int, number of files to extract features from the csv.
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        for ind, wav_name in enumerate(df_meta.filename.unique()):
            if ind % 500 == 0:
                LOG.debug(ind)
            wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
            wav_path = os.path.join(wav_dir, wav_name)

            out_filename = os.path.join(feature_dir,
                                        name_only(wav_name) + ".npy")

            if not os.path.exists(out_filename):
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    (audio, _) = read_audio(wav_path, cfg.sample_rate)
                    if audio.shape[0] == 0:
                        print("File %s is corrupted!" % wav_path)
                    else:
                        mel_spec = self.calculate_mel_spec(
                            audio, log_feature=self.save_log_feature)

                        np.save(out_filename, mel_spec)

                    LOG.debug("compute features time: %s" % (time.time() - t1))

        return df_meta.reset_index(drop=True)
Exemplo n.º 3
0
    def get_features(self, wav_path, feature_dir, frames):
        (audio, _) = read_audio(wav_path, cfg.sample_rate)
        mel_spec = self.calculate_mel_spec(audio,
                                           log_feature=self.save_log_feature)

        # Trunc the data so it is a multiple of frames. Just change the nb of frames
        # if you want padding instead
        if frames > mel_spec.shape[0]:
            pad_trunc_length = frames
        else:
            pad_trunc_length = mel_spec.shape[0] - mel_spec.shape[0] % frames
        mel_spec = pad_trunc_seq(mel_spec, pad_trunc_length)

        # Reshape in multiple segments and save them
        mel_spec_frames = mel_spec.reshape(-1, frames, mel_spec.shape[-1])
        out_filenames = []
        wav_name = os.path.basename(wav_path)
        for cnt, sample in enumerate(mel_spec_frames):
            out_filename = os.path.join(feature_dir, name_only(wav_name)) + "fr" + str(frames) + "_" + \
                           str(cnt * frames) + "-" + str((cnt + 1) * frames) + ".npy"
            np.save(out_filename, sample)
            out_filenames.append(out_filename)
        cnt_max = len(mel_spec_frames)
        return audio, cnt_max
Exemplo n.º 4
0
    def extract_features_from_meta(self,
                                   csv_audio,
                                   subpart_data=None,
                                   training=False):
        """Extract log mel spectrogram features.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            subpart_data: int, number of files to extract features from the csv.
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        df_all = list()
        feature_fns = list()
        LOG.info('Extracting/loading features')
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        augmentation_funcs = [
            ('orig', None),  # original signal
        ]

        if training:
            augmentation_funcs += [
                # ('lpf4k', partial(lpf, wc=4000, fs=cfg.sample_rate)),
                # ('lpf8k', partial(lpf, wc=8000, fs=cfg.sample_rate)),
                # ('lpf16k', partial(lpf, wc=16000, fs=cfg.sample_rate)),
                # ('ps-6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-6)),
                # ('ps-3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=-3)),
                # ('ps+3', partial(pitch_shift, sr=cfg.sample_rate, n_steps=3)),
                # ('ps+6', partial(pitch_shift, sr=cfg.sample_rate, n_steps=6)),
                # ('ts1.25', partial(time_stretch, rate=1.25)),
                # ('ts1.5', partial(time_stretch, rate=1.5)),
                # ('amp0.5', partial(amplitude_scale, coeff=0.5)),
                # ('amp0.75', partial(amplitude_scale, coeff=0.75)),
                # ('hp0.25', partial(hp_reweight, lam=0.25)),
                # ('hp0.75', partial(hp_reweight, lam=0.75))
            ]

        wav_fns = df_meta.filename.unique()
        flag = False
        for ind, wav_name in tqdm(enumerate(wav_fns), total=len(wav_fns)):
            if ind % 500 == 0:
                LOG.debug(ind)

            # verify the audio file is present
            wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
            wav_path = os.path.join(wav_dir, wav_name)
            if os.path.isfile(wav_path):
                # defer loading audio until the need for feature extraction is verified
                audio = None

                # perform all augmentations (including no augmentation)
                for name, func in augmentation_funcs:
                    if name == 'orig':
                        out_filename = os.path.splitext(wav_name)[0] + ".npy"
                    else:
                        out_filename = os.path.splitext(
                            wav_name)[0] + '_' + name + ".npy"
                    out_path = os.path.join(self.feature_dir, out_filename)

                    # add the metadata
                    meta = df_meta.loc[df_meta.filename == wav_name]
                    df_all.append(meta)

                    # for synthetic data with time annotation of events, the meta df will have several entries for
                    # each wav file. therefore, we need to append the feature filename len(meta) times.
                    if len(meta) > 1:
                        feature_fns += [out_filename] * len(meta)
                        if flag:
                            print('Length of meta: {}'.format(len(meta)))
                            flag = False
                    else:
                        feature_fns.append(out_filename)

                    if not os.path.exists(out_path):
                        if audio is None:
                            (audio, _) = read_audio(wav_path, cfg.sample_rate)
                            if audio.shape[0] == 0:
                                print("File %s is corrupted!" % wav_path)
                                del feature_fns[-1]
                                del df_all[-1]

                        # perform any augmentation, extract features, save features
                        # LOG.info('extracting {}'.format(out_filename))
                        if func is not None:
                            mel_spec = self.calculate_mel_spec(func(audio))
                        else:
                            mel_spec = self.calculate_mel_spec(audio)
                        np.save(out_path, mel_spec)

                        LOG.debug("compute features time: %s" %
                                  (time.time() - t1))
            else:
                LOG.error(
                    "File %s is in the csv file but the feature is not extracted!"
                    % wav_path)
                # df_meta = df_meta.drop(df_meta[df_meta.filename == wav_name].index)

        # form the final DataFrame of meta data for features from original and augmented audio
        df_all = pd.concat(df_all).reset_index(drop=True)
        df_all['feature_filename'] = feature_fns

        return df_all
Exemplo n.º 5
0
    def extract_features_from_meta_segment(self,
                                           csv_audio,
                                           feature_dir,
                                           subpart_data=None,
                                           fixed_segment=None):
        """Extract log mel spectrogram features, but the csv needs to be strongly labeled.

        Args:
            csv_audio : str, file containing names, durations and labels : (name, start, end, label, label_index)
                the associated wav_filename is Yname_start_end.wav
            feature_dir: str, the path of the features directory.
            subpart_data: int, number of files to extract features from the csv.
            fixed_segment: float, in seconds, the size of the kept segment. If >audio length, the audio length is kept.
                If segment is True, and >label, it takes the surrounding (allow creating weak labels).
        """
        t1 = time.time()
        df_meta = self.get_df_from_meta(csv_audio, subpart_data)
        self.get_classes(df_meta)
        LOG.info("{} Total file number: {}".format(
            csv_audio, len(df_meta.filename.unique())))

        ext_name = "_segment_"
        if subpart_data:
            ext_name += str(subpart_data)

        if fixed_segment is not None:
            LOG.debug(
                f" durations before: "
                f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}"
            )
            ext_name += f"fix{fixed_segment}"
            df_meta = self.trunc_pad_segment(df_meta, fixed_segment)
            LOG.debug(
                f" durations after: "
                f"{df_meta.groupby('event_label').apply(lambda x: (x.offset - x.onset).mean())}"
            )

        meta_base, meta_ext = os.path.splitext(csv_audio.split("/")[-1])
        csv_features = os.path.join(self.metadata_dir,
                                    meta_base + ext_name + meta_ext)

        wav_dir = self.get_audio_dir_path_from_meta(csv_audio)
        df_features = pd.DataFrame()

        path_exists = os.path.exists(csv_features)

        if not path_exists:
            # Loop in all the filenames
            for ind, wav_name in enumerate(df_meta.filename.unique()):
                if ind % 500 == 0:
                    LOG.debug(ind)

                wav_path = os.path.join(wav_dir, wav_name)
                if not os.path.isfile(wav_path):
                    LOG.error(
                        "File %s is in the csv file but the feature is not extracted, deleting...!"
                        % wav_path)
                    df_meta = df_meta.drop(
                        df_meta[df_meta.filename == wav_name].index)
                else:
                    try:
                        audio_len_sec = soundfile.info(wav_path).duration
                    except Exception as e:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                        print(e)
                        continue
                    if audio_len_sec == 0:
                        print("File %s is corrupted, not added to df!" %
                              wav_path)
                    else:
                        files_exist = True
                        # How many features we can compute from this file ?
                        sub_df = df_meta[df_meta.filename == wav_name]
                        cnt_max = len(sub_df)

                        if cnt_max == 0:
                            break

                        base_wav_name = name_only(wav_name)
                        ext_featname = "_seg"
                        if fixed_segment:
                            ext_featname += f"fix{fixed_segment}"
                            files_exist = False  # We should always recompute because of the randomness of onset offset
                        # Check if files already exist
                        out_filenames = [
                            base_wav_name + ext_featname + str(cnt) + ".npy"
                            for cnt in range(cnt_max)
                        ]
                        for fname in out_filenames:
                            fpath = os.path.join(feature_dir, fname)
                            if not os.path.exists(fpath):
                                files_exist = False
                                break

                        add_item = {
                            "raw_filename": [],
                            "filename": [],
                            "event_labels": []
                        }
                        for ii, (i, row) in enumerate(sub_df.iterrows()):
                            if not pd.isna(row.event_label):
                                if ii > 0:
                                    extnb = str(ii)
                                else:
                                    extnb = ""
                                out_filename = os.path.join(
                                    feature_dir, name_only(wav_name))
                                out_filename += ext_featname + extnb + ".npy"
                                if not files_exist:
                                    sr = soundfile.info(wav_path).samplerate
                                    (audio,
                                     _) = read_audio(wav_path,
                                                     cfg.sample_rate,
                                                     start=int(row.onset * sr),
                                                     stop=int(row.offset * sr))
                                    mel_spec = self.calculate_mel_spec(
                                        audio,
                                        log_feature=self.save_log_feature)
                                    if fixed_segment:
                                        pad_trunc_length = int(
                                            fixed_segment * cfg.sample_rate //
                                            cfg.hop_length)
                                        mel_spec = pad_trunc_seq(
                                            mel_spec, pad_trunc_length)
                                    np.save(out_filename, mel_spec)

                                add_item["raw_filename"].append(wav_name)
                                add_item["filename"].append(out_filename)
                                add_item["event_labels"].append(
                                    row["event_label"])

                        df_features = df_features.append(
                            pd.DataFrame(add_item), ignore_index=True)

            df_features.to_csv(csv_features,
                               sep="\t",
                               header=True,
                               index=False)
            df_features = pd.read_csv(
                csv_features,
                sep="\t")  # Otherwise event_labels is "" and not NaN
        else:
            df_features = self.get_df_from_meta(
                csv_features)  # No subpart data because should be in the name

        LOG.debug("compute features time: %s" % (time.time() - t1))
        return df_features