Пример #1
0
def display_spectrogram():
    """
    Function used to generate and display sample spectrogram from audio files.
    """
    paths = prep_utils.get_absolute_file_paths(AUDIO_CHUNKS_20S_DIR)[:3]

    for path in paths:
        y, sr = librosa.load(path)

        # Decompose a spectrogram with NMF
        # Short-time Fourier transform underlies most analysis.
        # librosa.stft returns a complex matrix D.
        # D[f, t] is the FFT value at frequency f, time (frame) t.
        D = librosa.stft(y)

        # Separate the magnitude and phase and only use magnitude
        S, phase = librosa.magphase(D)
        print("S Shape: ", S.shape)

        melspec_log = librosa.feature.melspectrogram(S=np.log(S), sr=sr)
        print("MelSpec Shape: ", melspec_log.shape)

        plt.figure()
        librosa.display.specshow(melspec_log, y_axis='mel', x_axis='time')
        plt.colorbar()
        plt.show()
Пример #2
0
def make_audio_chunks(seconds, dest_dir):
    """
    Function used to convert audio into shorter audio clips, and save audio clips to files.

    :param seconds: desired clip length
    :param dest_dir: output directory
    """
    paths = prep_utils.get_absolute_file_paths(DATASET_DIR, ".wav")

    start_time = time.time()
    for audio_path in paths:
        prep_utils.display_progress_eta(current_item=audio_path,
                                        total_items=paths,
                                        start_time=start_time)

        audio = AudioSegment.from_file(audio_path)
        chunk_length_ms = seconds * 1000  # 20 seconds
        chunks = make_chunks(audio, chunk_length_ms)
        chunks.pop(-1)

        # Export all of the individual chunks as wav files
        for i, chunk in enumerate(chunks):
            _, chunk_name = os.path.split(
                os.path.splitext(audio_path)[0] + "_chunk_{0}.wav".format(i))
            chunk.export(dest_dir + chunk_name, format="wav")

    print("\n\nChunks export completed.")
Пример #3
0
def downsample():
    """
    Downsample and resize to 256x256, and save them locally
    """
    paths = prep_utils.get_absolute_file_paths(PROCESSED_STFT_DIR)
    for path in paths:
        S = np.load(path)
        S_downsample = skimage.transform.resize(S, (256, 256),
                                                anti_aliasing=True)
        out = RESIZED_STFT_DIR + prep_utils.get_filename(path) + ".npy"
        np.save(out, S_downsample)
Пример #4
0
def audio_reconstruction():
    """
    Function used to reconstruct sample audio clips from STFT matrices, and save audio to file.
    """
    paths = prep_utils.get_absolute_file_paths(STFT_ARRAY_DIR)

    for path in paths:
        S = np.load(path)
        y = librosa.griffinlim(S)

        out = AUDIO_OUT_DIR + prep_utils.get_filename(path) + ".wav"

        # Save reconstructed data
        scipy.io.wavfile.write(out, 22050, y)
Пример #5
0
def record_mean_std():
    """
    Record mean and std of all STFT matrices and save them locally
    """
    paths = prep_utils.get_absolute_file_paths(STFT_ARRAY_DIR)

    mean_list = []
    std_list = []

    for path in paths:
        S = np.load(path)
        S = np.log(S)
        mag_mean = np.mean(S)
        mag_std = np.std(S)
        mean_list.append(mag_mean)
        std_list.append(mag_std)
        print("Finished:", path)

    data = {"mean": mean_list, "std": std_list, "path": paths}
    df = pd.DataFrame.from_dict(data)
    df.to_csv("./data/saved_mean_std.csv")
Пример #6
0
def audio_reconstruction_stylegan(src_dir,
                                  dest_dir,
                                  resize_h,
                                  resize_w,
                                  mode="RGB"):
    """
    Image to Audio reconstruction post StyleGAN image generation.

    :param src_dir: directory of fake images generated by StyleGAN
    :param dest_dir: destination directory where converted audio will be saved
    :param resize_h: height of the desired image dimension
    :param resize_w: width of the desired image dimension
    :param mode: "RGB" or "grayscale", generated image type by StyleGAN
    """
    src_dir, sub_dir = ar_utils.select_images_iteration(directory=src_dir)
    paths = prep_utils.get_absolute_file_paths(src_dir)

    out_dir = dest_dir + sub_dir
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    start_time = time.time()
    for path in paths:
        prep_utils.display_progress_eta(current_item=path,
                                        total_items=paths,
                                        start_time=start_time)
        out_path = out_dir + prep_utils.get_filename(path)

        if mode == "RGB":
            image = cv2.imread(path)
            image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            S_recovered = np.array(image_gray, dtype=np.float32)
            S_recovered = cv2.resize(S_recovered, (resize_w, resize_h),
                                     interpolation=cv2.INTER_CUBIC)

            S = (S_recovered - np.min(S_recovered)) / (
                np.max(S_recovered) - np.min(S_recovered)) * 2 - 1
            pd.DataFrame(S).to_csv(out_dir + prep_utils.get_filename(path) +
                                   "_norm.csv",
                                   header=None,
                                   index=False)
            plt.imsave(out_dir + prep_utils.get_filename(path) + "_norm.png",
                       S)

            S = ar_utils.unnormalize_stft(s=S)
            pd.DataFrame(S).to_csv(out_dir + prep_utils.get_filename(path) +
                                   "_reconstruct.csv",
                                   header=None,
                                   index=False)
            plt.imsave(
                out_dir + prep_utils.get_filename(path) + "_reconstruct.png",
                S)

            y = librosa.griffinlim(S)
            out = out_dir + prep_utils.get_filename(path) + ".wav"
            scipy.io.wavfile.write(out, 22050, y)

            rate, data = scipy.io.wavfile.read(out)
            reduced_noise = nr.reduce_noise(audio_clip=data,
                                            noise_clip=data,
                                            verbose=False)
            out = out_dir + prep_utils.get_filename(path) + "_nr.wav"
            sf.write(out, reduced_noise, rate)
        elif mode == "grayscale":
            S = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            S = np.array(S, dtype=np.float32)
            S_recovered = S
            pd.DataFrame(S_recovered).to_csv(out_path + "_original.csv",
                                             header=None,
                                             index=False)
            cv2.imwrite(out_path + "_original.png", S_recovered)

            S_recovered = cv2.resize(S_recovered, (resize_w, resize_h),
                                     interpolation=cv2.INTER_CUBIC)
            S_recovered = ar_utils.decrease_brightness(S_recovered)

            pd.DataFrame(S_recovered).to_csv(out_path + "_recovered.csv",
                                             header=None,
                                             index=False)
            cv2.imwrite(out_path + "_recovered.png", S_recovered)

            y = librosa.griffinlim(S_recovered)
            out = out_dir + prep_utils.get_filename(path) + "_recovered.wav"
            scipy.io.wavfile.write(out, 22050, y)

            rate, data = scipy.io.wavfile.read(out)
            reduced_noise = nr.reduce_noise(audio_clip=data,
                                            noise_clip=data,
                                            verbose=False)
            out = out_dir + prep_utils.get_filename(path) + "_nr.wav"
            sf.write(out, reduced_noise, rate)