示例#1
0
def convert_audio_to_stft(src_dir, dest_dir, extension):
    """
    Function used to convert audio clips into Short-Time Fourier Transform matrices, and save matrices to files.

    :param src_dir: input audio directory
    :param dest_dir: output STFT directory
    :param extension: desired output file type
    """
    paths = prep_utils.get_unprocessed_items(src_dir=src_dir,
                                             dest_dir=dest_dir)

    start_time = time.time()
    for path in paths:
        prep_utils.display_progress_eta(current_item=path,
                                        total_items=paths,
                                        start_time=start_time)

        y, sr = librosa.load(path)

        # Decompose a spectrogram with NMF
        D = librosa.stft(y)

        # Separate the magnitude and phase and only use magnitude
        S, _ = librosa.magphase(D)

        out = dest_dir + prep_utils.get_filename(path) + extension
        np.save(out, S)
示例#2
0
def make_audio_chunks(seconds, dest_dir):
    """
    Function used to convert audio into shorter audio clips, and save audio clips to files.

    :param seconds: desired clip length
    :param dest_dir: output directory
    """
    paths = prep_utils.get_absolute_file_paths(DATASET_DIR, ".wav")

    start_time = time.time()
    for audio_path in paths:
        prep_utils.display_progress_eta(current_item=audio_path,
                                        total_items=paths,
                                        start_time=start_time)

        audio = AudioSegment.from_file(audio_path)
        chunk_length_ms = seconds * 1000  # 20 seconds
        chunks = make_chunks(audio, chunk_length_ms)
        chunks.pop(-1)

        # Export all of the individual chunks as wav files
        for i, chunk in enumerate(chunks):
            _, chunk_name = os.path.split(
                os.path.splitext(audio_path)[0] + "_chunk_{0}.wav".format(i))
            chunk.export(dest_dir + chunk_name, format="wav")

    print("\n\nChunks export completed.")
示例#3
0
def convert_stft_to_images_grayscale(src_dir, dest_dir, ext=".png", size=None):
    """
    Convert STFT matrices into grayscale images

    :param src_dir: source directory that stores STFT matrices
    :param dest_dir: destination where converted images are being saved
    :param ext: image extension
    :param size: resize dimension
    """
    paths = prep_utils.get_unprocessed_items(src_dir=src_dir,
                                             dest_dir=dest_dir)

    start_time = time.time()
    for path in paths:
        prep_utils.display_progress_eta(current_item=path,
                                        total_items=paths,
                                        start_time=start_time)

        S = np.load(path)
        S_scaled = prep_utils.increase_brightness(S)

        if size:
            S_scaled = cv2.resize(S_scaled, (size, size),
                                  interpolation=cv2.INTER_CUBIC)

        out_path = dest_dir + prep_utils.get_filename(path) + ext
        cv2.imwrite(out_path, S_scaled)
示例#4
0
def convert_stft_to_images(src_dir, dest_dir, ext=".png", size=None):
    """
    Function used to convert STFT matrices to images, and saves them to destination folder

    :param src_dir: source folder where STFT matrices are stored
    :param dest_dir: output images folder
    :param ext: image format, defaulted to .png
    :param size: dimension of desired square image
    """
    paths = prep_utils.get_unprocessed_items(src_dir=src_dir,
                                             dest_dir=dest_dir)

    start_time = time.time()
    for path in paths:
        prep_utils.display_progress_eta(current_item=path,
                                        total_items=paths,
                                        start_time=start_time)

        S_norm = np.load(path)
        S_norm = normalize_stft(S_norm)

        if size:
            S_norm = cv2.resize(S_norm, (size, size),
                                interpolation=cv2.INTER_CUBIC)

        out_path = dest_dir + prep_utils.get_filename(path) + ext
        plt.imsave(out_path, S_norm)

        image = cv2.imread(out_path)
        cv2.imwrite(out_path, image)
def audio_reconstruction_test(src_dir, dest_dir, ext=".png", size=None):
    """
    Test different approaches to image to audio conversion

    :param src_dir: image directory
    :param dest_dir: audio directory
    :param ext: image type
    :param size: desired dimension for resizing
    """
    paths = prep_utils.get_unprocessed_items(src_dir=src_dir,
                                             dest_dir=dest_dir)

    start_time = time.time()
    for path in paths:
        prep_utils.display_progress_eta(current_item=path,
                                        total_items=paths,
                                        start_time=start_time)

        S = np.load(path)
        cv2.imshow("image", S)
        cv2.waitKey(0)
        pd.DataFrame(S).to_csv(dest_dir + "S.csv", header=None, index=False)

        S_scaled = prep_utils.increase_brightness(S)
        cv2.imshow("image", S_scaled)
        cv2.waitKey(0)
        pd.DataFrame(S_scaled).to_csv(dest_dir + "S_scaled.csv",
                                      header=None,
                                      index=False)

        out_path = dest_dir + "gray" + ext
        cv2.imwrite(out_path, S_scaled)

        S = cv2.imread(out_path, 0)
        S = np.array(S, dtype=np.float32)
        S_recovered = S

        if size:
            S_recovered = cv2.resize(S_recovered, (size, size),
                                     interpolation=cv2.INTER_CUBIC)
        pd.DataFrame(S_recovered).to_csv(dest_dir + "S_recovered.csv",
                                         header=None,
                                         index=False)
        out_path = dest_dir + "resized" + ext
        cv2.imwrite(out_path, S_recovered)

        S_audio = np.genfromtxt(dest_dir + "S_recovered.csv", delimiter=',')
        S_audio = np.array(S_audio, dtype=np.float32)
        S_audio = cv2.resize(S_audio, (431, 1025),
                             interpolation=cv2.INTER_CUBIC)

        y = librosa.griffinlim(S_audio)

        out = dest_dir + "s.wav"

        # Save reconstructed data
        scipy.io.wavfile.write(out, 22050, y)
        break
def audio_reconstruction_stylegan(src_dir,
                                  dest_dir,
                                  resize_h,
                                  resize_w,
                                  mode="RGB"):
    """
    Image to Audio reconstruction post StyleGAN image generation.

    :param src_dir: directory of fake images generated by StyleGAN
    :param dest_dir: destination directory where converted audio will be saved
    :param resize_h: height of the desired image dimension
    :param resize_w: width of the desired image dimension
    :param mode: "RGB" or "grayscale", generated image type by StyleGAN
    """
    src_dir, sub_dir = ar_utils.select_images_iteration(directory=src_dir)
    paths = prep_utils.get_absolute_file_paths(src_dir)

    out_dir = dest_dir + sub_dir
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    start_time = time.time()
    for path in paths:
        prep_utils.display_progress_eta(current_item=path,
                                        total_items=paths,
                                        start_time=start_time)
        out_path = out_dir + prep_utils.get_filename(path)

        if mode == "RGB":
            image = cv2.imread(path)
            image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            S_recovered = np.array(image_gray, dtype=np.float32)
            S_recovered = cv2.resize(S_recovered, (resize_w, resize_h),
                                     interpolation=cv2.INTER_CUBIC)

            S = (S_recovered - np.min(S_recovered)) / (
                np.max(S_recovered) - np.min(S_recovered)) * 2 - 1
            pd.DataFrame(S).to_csv(out_dir + prep_utils.get_filename(path) +
                                   "_norm.csv",
                                   header=None,
                                   index=False)
            plt.imsave(out_dir + prep_utils.get_filename(path) + "_norm.png",
                       S)

            S = ar_utils.unnormalize_stft(s=S)
            pd.DataFrame(S).to_csv(out_dir + prep_utils.get_filename(path) +
                                   "_reconstruct.csv",
                                   header=None,
                                   index=False)
            plt.imsave(
                out_dir + prep_utils.get_filename(path) + "_reconstruct.png",
                S)

            y = librosa.griffinlim(S)
            out = out_dir + prep_utils.get_filename(path) + ".wav"
            scipy.io.wavfile.write(out, 22050, y)

            rate, data = scipy.io.wavfile.read(out)
            reduced_noise = nr.reduce_noise(audio_clip=data,
                                            noise_clip=data,
                                            verbose=False)
            out = out_dir + prep_utils.get_filename(path) + "_nr.wav"
            sf.write(out, reduced_noise, rate)
        elif mode == "grayscale":
            S = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            S = np.array(S, dtype=np.float32)
            S_recovered = S
            pd.DataFrame(S_recovered).to_csv(out_path + "_original.csv",
                                             header=None,
                                             index=False)
            cv2.imwrite(out_path + "_original.png", S_recovered)

            S_recovered = cv2.resize(S_recovered, (resize_w, resize_h),
                                     interpolation=cv2.INTER_CUBIC)
            S_recovered = ar_utils.decrease_brightness(S_recovered)

            pd.DataFrame(S_recovered).to_csv(out_path + "_recovered.csv",
                                             header=None,
                                             index=False)
            cv2.imwrite(out_path + "_recovered.png", S_recovered)

            y = librosa.griffinlim(S_recovered)
            out = out_dir + prep_utils.get_filename(path) + "_recovered.wav"
            scipy.io.wavfile.write(out, 22050, y)

            rate, data = scipy.io.wavfile.read(out)
            reduced_noise = nr.reduce_noise(audio_clip=data,
                                            noise_clip=data,
                                            verbose=False)
            out = out_dir + prep_utils.get_filename(path) + "_nr.wav"
            sf.write(out, reduced_noise, rate)