Exemplos de wav_read_center em Python, exemplos de generate_samples.wav_read_center em Python

Exemplo n.º 1

0

Exibir arquivo

    def createroom(mic_p, mic_d, sour_p, sour_d, callback_mix, roomdim,
                   absorption, max_order, n_mics, angle):
        np.random.seed(10)
        # STFT parameters
        framesize = 4096
        win_a = pra.hann(framesize)
        win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2)
        # algorithm parameters
        # param ogive
        ogive_mu = 0.1
        ogive_update = "switching"
        ogive_iter = 2000
        SIR = 10  # dB
        SNR = (
            60
        )  # dB, this is the SNR with respect to a single target source and microphone self-noise

        ########separation params#############
        algo = algo_choices[0]
        no_cb = True
        save = True
        n_iter = 60
        dist = "gauss"  #guass or laplace
        ########paramas set##################
        fs = 44100
        n_sources = 2
        n_mics = n_mics
        n_sources_target = 2
        assert n_sources_target <= n_mics, "More sources than microphones is not supported"

        # set the source powers, the first one is half
        source_std = np.ones(n_sources_target)
        # room size
        room_dim = roomdim
        # micro position
        rot = angle
        offset = np.pi - rot / 2
        mic_locs = semi_circle_layout(mic_p, rot, mic_d, n_mics,
                                      rot=offset)  ###micro2

        # target position
        target_locs = np.transpose([[7, 10, 6], [9, 16, 6]])
        #interference position
        interferer_locs = random_layout([14, 0, 6],
                                        n_sources - n_sources_target,
                                        offset=[5, 20, 3],
                                        seed=1)
        source_locs = target_locs
        # audio loaded
        wav_files = [amBird, saBird]
        signals = wav_read_center(wav_files, seed=123)

        #create room
        room = pra.ShoeBox(room_dim,
                           fs=44100,
                           absorption=absorption,
                           max_order=max_order,
                           air_absorption=True,
                           humidity=50)

        # add source
        for sig, loc in zip(signals, source_locs.T):
            room.add_source(loc, signal=sig)

        # add micro
        room.add_microphone_array(pra.MicrophoneArray(mic_locs, fs=room.fs))

        callback_mix_kwargs = {
            "snr": SNR,
            "sir": SIR,
            "n_src": n_sources,
            "n_tgt": n_sources_target,
            "src_std": source_std,
            "ref_mic": 0,
        }

        # Run the simulation
        separate_recordings = room.simulate(
            callback_mix=callback_mix,
            callback_mix_kwargs=callback_mix_kwargs,
            return_premix=True,
        )
        mics_signals = room.mic_array.signals
        print("Simulation done.")

        # rt60 = room.measure_rt60()
        # print(rt60)

        # Monitor Convergence
        ref = np.moveaxis(separate_recordings, 1, 2)
        if ref.shape[0] < n_mics:
            ref = np.concatenate(
                (ref,
                 np.random.randn(n_mics - ref.shape[0], ref.shape[1],
                                 ref.shape[2])),
                axis=0,
            )

        SDR, SIR, cost_func = [], [], []
        convergence_callback = None

        # START BSS

        # shape: (n_frames, n_freq, n_mics)
        X_all = pra.transform.analysis(mics_signals.T,
                                       framesize,
                                       framesize // 2,
                                       win=win_a).astype(np.complex128)
        X_mics = X_all[:, :, :n_mics]

        tic = time.perf_counter()

        # Run BSS
        if algo == "auxiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "auxiva_pca":
            # Run AuxIVA
            Y = auxiva_pca(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "overiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ilrma":
            # Run AuxIVA
            Y = pra.bss.ilrma(
                X_mics,
                n_iter=n_iter,
                n_components=2,
                proj_back=True,
                callback=convergence_callback,
            )
        elif algo == "ogive":
            # Run OGIVE
            Y = ogive(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ogive_matlab":
            # Run OGIVE
            Y = ogive_matlab_wrapper(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        else:
            raise ValueError("No such algorithm {}".format(algo))

        toc = time.perf_counter()

        # Run iSTFT
        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0],
                                        framesize,
                                        framesize // 2,
                                        win=win_s)[:, None]
            y = y.astype(np.float64)
        else:
            y = pra.transform.synthesis(Y,
                                        framesize,
                                        framesize // 2,
                                        win=win_s).astype(np.float64)

        # If some of the output are uniformly zero, just add a bit of noise to compare
        for k in range(y.shape[1]):
            if np.sum(np.abs(y[:, k])) < 1e-10:
                y[:, k] = np.random.randn(y.shape[0]) * 1e-10

        # For conventional methods of BSS, reorder the signals by decreasing power
        if algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        # Compare SIR
        m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(
            ref[:n_sources_target, :m, 0],
            y[framesize // 2:m + framesize // 2, :n_sources_target].T,
        )

        # reorder the vector of reconstructed signals
        y_hat = y[:, perm]
        print("SDR:", sdr)
        print("SIR:", sir)

        ####save mix and separation #######
        if save:
            from scipy.io import wavfile
            wavfile.write(
                "birdmix.wav",
                room.fs,
                (pra.normalize(mics_signals, bits=16).astype(np.int16).T)[:,
                                                                          0],
            )
            for i, sig in enumerate(y_hat.T):
                wavfile.write(
                    "birdsep{}.wav".format(i + 1),
                    room.fs,
                    pra.normalize(sig, bits=16).astype(np.int16).T,
                )

Exemplo n.º 2

0

Exibir arquivo

Arquivo: mbss_oneshot.py Projeto: onolab-tmu/blinky-iva

                                     n_sources_target,
                                     rot=0.743 * np.pi)
    # interferer_locs = grid_layout([3., 5.5], n_sources - n_sources_target, offset=[6.5, 1., 1.7])
    interferer_locs = random_layout([3.0, 5.5, 1.5],
                                    n_sources - n_sources_target,
                                    offset=[6.5, 1.0, 0.5],
                                    seed=1)
    source_locs = np.concatenate((target_locs, interferer_locs), axis=1)

    # Prepare the signals
    wav_files = sampling(1,
                         n_sources,
                         f"{samples_dir}/metadata.json",
                         gender_balanced=True,
                         seed=8)[0]
    signals = wav_read_center(wav_files, seed=123)

    # Place the blinkies regularly in the room (2D plane)
    blinky_locs = gm_layout(n_blinkies,
                            target_locs - np.c_[[0.0, 0.0, 0.4]],
                            std=[0.4, 0.4, 0.05],
                            seed=987)

    all_locs = np.concatenate((mic_locs, blinky_locs), axis=1)

    # Create the room itself
    room = pra.ShoeBox(room_dim,
                       fs=fs,
                       absorption=absorption,
                       max_order=max_order)

Exemplo n.º 3

0

Exibir arquivo

Arquivo: paper_simulation.py Projeto: zhuleiustc1983/code_2020ICASSP_five

def one_loop(args):
    global parameters

    import time
    import numpy

    np = numpy

    import pyroomacoustics

    pra = pyroomacoustics

    import os
    import sys

    sys.path.append(parameters["base_dir"])

    from auxiva_pca import auxiva_pca, pca_separation
    from five import five
    from ive import ogive
    from overiva import overiva
    from pyroomacoustics.bss.common import projection_back
    from room_builder import callback_noise_mixer, random_room_builder

    # import samples helper routine
    from get_data import samples_dir

    sys.path.append(os.path.join(parameters['base_dir'], samples_dir))
    from generate_samples import wav_read_center

    n_targets, n_interferers, n_mics, sinr, wav_files, room_seed, seed = args

    # this is the underdetermined case. We don't do that.
    if n_mics < n_targets:
        return []

    # set MKL to only use one thread if present
    try:
        import mkl

        mkl.set_num_threads(1)
    except ImportError:
        pass

    # set the RNG seed
    rng_state = np.random.get_state()
    np.random.seed(seed)

    # STFT parameters
    framesize = parameters["stft_params"]["framesize"]
    hop = parameters["stft_params"]["hop"]
    if parameters["stft_params"]["window"] == "hann":
        win_a = pra.hamming(framesize)
    else:  # default is Hann
        win_a = pra.hann(framesize)
    win_s = pra.transform.compute_synthesis_window(win_a, hop)

    # Generate the audio signals

    # get the simulation parameters from the json file
    # Simulation parameters
    sources_var = np.ones(n_targets)

    # total number of sources
    n_sources = n_targets + n_interferers

    # Read the signals
    wav_files = [os.path.join(parameters["base_dir"], fn) for fn in wav_files]
    signals = wav_read_center(wav_files[:n_sources], seed=123)

    # Get a random room
    room, rt60 = random_room_builder(signals,
                                     n_mics,
                                     seed=room_seed,
                                     **parameters["room_params"])
    premix = room.simulate(return_premix=True)

    # mix the signal
    n_samples = premix.shape[2]
    mix = callback_noise_mixer(
        premix,
        sinr=sinr,
        diffuse_ratio=parameters["sinr_diffuse_ratio"],
        n_src=n_sources,
        n_tgt=n_targets,
        tgt_std=np.sqrt(sources_var),
        ref_mic=parameters["ref_mic"],
    )

    # sum up the background
    # shape (n_mics, n_samples)
    background = np.sum(premix[n_targets:n_sources, :, :], axis=0)

    # shape (n_targets+1, n_samples, n_mics)
    ref = np.zeros((n_targets + 1, premix.shape[2], premix.shape[1]),
                   dtype=premix.dtype)
    ref[:n_targets, :, :] = premix[:n_targets, :, :].swapaxes(1, 2)
    ref[n_targets, :, :] = background.T

    synth = np.zeros_like(ref)

    # START BSS
    ###########

    # shape: (n_frames, n_freq, n_mics)
    X_all = pra.transform.analysis(mix.T, framesize, hop, win=win_a)
    X_mics = X_all[:, :, :n_mics]

    # convergence monitoring callback
    def convergence_callback(Y, X, n_targets, SDR, SIR, eval_time, ref,
                             framesize, win_s, algo_name):
        t_in = time.perf_counter()

        # projection back
        z = projection_back(Y, X[:, :, 0])
        Y = Y * np.conj(z[None, :, :])

        from mir_eval.separation import bss_eval_sources

        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, hop,
                                        win=win_s)[:, None]
        else:
            y = pra.transform.synthesis(Y, framesize, hop, win=win_s)

        if algo_name not in parameters["overdet_algos"]:
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        m = np.minimum(y.shape[0] - hop, ref.shape[1])

        synth[:n_targets, :m, 0] = y[hop:m + hop, :n_targets].T
        synth[n_targets, :m, 0] = y[hop:m + hop, 0]

        sdr, sir, sar, perm = bss_eval_sources(ref[:n_targets + 1, :m, 0],
                                               synth[:, :m, 0])
        SDR.append(sdr[:n_targets].tolist())
        SIR.append(sir[:n_targets].tolist())

        t_out = time.perf_counter()
        eval_time.append(t_out - t_in)

    # store results in a list, one entry per algorithm
    results = []

    # compute the initial values of SDR/SIR
    init_sdr = []
    init_sir = []

    convergence_callback(X_mics, X_mics, n_targets, init_sdr, init_sir, [],
                         ref, framesize, win_s, "init")

    for full_name, params in parameters["algorithm_kwargs"].items():

        name = params["algo"]
        kwargs = params["kwargs"]

        if name == "auxiva_pca" and n_targets == 1:
            # PCA doesn't work for single source scenario
            continue
        elif name in ["ogive", "five"] and n_targets != 1:
            # OGIVE is only for single target
            continue

        results.append({
            "algorithm": full_name,
            "n_targets": n_targets,
            "n_interferers": n_interferers,
            "n_mics": n_mics,
            "rt60": rt60,
            "sinr": sinr,
            "seed": seed,
            "sdr": [],
            "sir": [],  # to store the result
            "runtime": np.nan,
            "eval_time": np.nan,
            "n_samples": n_samples,
        })

        # this is used to keep track of time spent in the evaluation callback
        eval_time = []

        def cb(Y):
            convergence_callback(
                Y,
                X_mics,
                n_targets,
                results[-1]["sdr"],
                results[-1]["sir"],
                eval_time,
                ref,
                framesize,
                win_s,
                name,
            )

        # avoid one computation by using the initial values of sdr/sir
        results[-1]["sdr"].append(init_sdr[0])
        results[-1]["sir"].append(init_sir[0])

        try:
            t_start = time.perf_counter()

            if name == "auxiva":
                # Run AuxIVA
                # this calls full IVA when `n_src` is not provided
                Y = overiva(X_mics, callback=cb, **kwargs)

            elif name == "auxiva_pca":
                # Run AuxIVA
                Y = auxiva_pca(X_mics,
                               n_src=n_targets,
                               callback=cb,
                               proj_back=False,
                               **kwargs)

            elif name == "overiva":
                # Run BlinkIVA
                Y = overiva(X_mics,
                            n_src=n_targets,
                            callback=cb,
                            proj_back=False,
                            **kwargs)

            elif name == "overiva2":
                # Run BlinkIVA
                Y = overiva(X_mics,
                            n_src=n_targets,
                            callback=cb,
                            proj_back=False,
                            **kwargs)

            elif name == "five":
                # Run AuxIVE
                Y = five(X_mics, callback=cb, proj_back=False, **kwargs)

            elif name == "ilrma":
                # Run AuxIVA
                Y = pra.bss.ilrma(X_mics,
                                  callback=cb,
                                  proj_back=False,
                                  **kwargs)

            elif name == "ogive":
                # Run OGIVE
                Y = ogive(X_mics, callback=cb, proj_back=False, **kwargs)

            elif name == "pca":
                # Run PCA
                Y = pca_separation(X_mics, n_src=n_targets)

            else:
                continue

            t_finish = time.perf_counter()

            # The last evaluation
            convergence_callback(
                Y,
                X_mics,
                n_targets,
                results[-1]["sdr"],
                results[-1]["sir"],
                [],
                ref,
                framesize,
                win_s,
                name,
            )

            results[-1]["eval_time"] = np.sum(eval_time)
            results[-1][
                "runtime"] = t_finish - t_start - results[-1]["eval_time"]

        except:
            import os, json

            pid = os.getpid()
            # report last sdr/sir as np.nan
            results[-1]["sdr"].append(np.nan)
            results[-1]["sir"].append(np.nan)
            # now write the problem to file
            fn_err = os.path.join(parameters["_results_dir"],
                                  "error_{}.json".format(pid))
            with open(fn_err, "a") as f:
                f.write(json.dumps(results[-1], indent=4))
            # skip to next iteration
            continue

    # restore RNG former state
    np.random.set_state(rng_state)

    return results

Exemplo n.º 4

0

Exibir arquivo

    def createroom(amBird, saBird, noises, mic_p, mic_d, sour_p, sour_d, callback_mix, roomdim, absorption, max_order, n_mics, angle):
        ###########IVA params set################
        np.random.seed(10)
        # STFT parameters
        framesize = 4096
        win_a = pra.hann(framesize)
        win_s = pra.transform.compute_synthesis_window(
            win_a, framesize // 2)
        # algorithm parameters
        # param ogive
        ogive_mu = 0.1
        ogive_update = "switching"
        ogive_iter = 2000
      
        # dB, this is the SNR with respect to a single target source and microphone self-noise
     
    ########separation params##################
        algo = algo_choices[0]
        no_cb = True
        save = True
        n_iter = 60  
        dist = "gauss"  # guass or laplace
    ########paramas set##################
        fs = 44100
        snr=60
        sinr=10
        n_sources = 2+3  
        n_mics = n_mics 
        n_sources_target = 2  
        assert n_sources_target <= n_mics, "More sources than microphones is not supported"

    # set the source powers, the first one is half
        source_std = np.ones(n_sources_target)
    # position
        # room size
        room_dim = roomdim
        # micro position 
        rot = angle
        offset = np.pi-rot/2
        mic_locs = semi_circle_layout(
            mic_p, rot, mic_d, n_mics, rot=offset)  # micro2

        # target position 
        target_locs = np.transpose([[7, 10, 6], [9, 16, 6]])
        
        #interfere position 
        interferer_locs = random_layout([16, 2, 6], 3, offset=[5, 18, 3], seed=1)
        source_locs = np.concatenate((target_locs, interferer_locs), axis=1)
        # source_locs = target_locs
        # audio input 
        wav_files = [amBird, saBird, noises[0],noises[1],noises[2]]
        signals = wav_read_center(wav_files, seed=123)

        # create room 
        room = pra.ShoeBox(room_dim, fs=44100, absorption=absorption,
                           max_order=max_order, air_absorption=True, humidity=50)

        # add source
        for sig, loc in zip(signals, source_locs.T):
            room.add_source(loc, signal=sig)

        # add micro
        room.add_microphone_array(
            pra.MicrophoneArray(mic_locs, fs=room.fs))

        #     #   draw
        # x = mic_locs[:2][0]
        # y = mic_locs[:2][1]
        # import matplotlib.pyplot as plt
        # plt.scatter(x,y)
        # plt.axis('equal')
        # plt.xlim([0,20])
        # plt.ylim([0,20])
        # x1 = source_locs[:2][0]
        # y1 = source_locs[:2][1]
        # plt.scatter(x1,y1)
        # plt.xlim([0,20])
        # plt.ylim([0,20])
        # plt.axis('equal')
        # x1 = interferer_locs[:2][0]
        # y1 = interferer_locs[:2][1]
        # plt.scatter(x1,y1)
        # plt.xlim([0,20])
        # plt.ylim([0,20])
        # plt.axis('equal')
        # plt.show()
        # callback_mix_kwargs = {
        #     "snr": SNR,
        #     "sir": SIR,
        #     "n_src": n_sources,
        #     "n_tgt": n_sources_target,
        #     "src_std": source_std,
        #     "ref_mic": 0,
        # }


        # power set  
        premix = room.simulate(return_premix=True)
        n_samples = premix.shape[2]
        # Normalize the signals so that they all have unit variance at the reference microphone
        ref_mic=0 
        p_mic_ref = np.std(premix[:, ref_mic, :], axis=1)
        premix /= p_mic_ref[:, None, None]  
        sources_var = np.ones(n_sources_target) # 
        # scale to pre-defined variance
        premix[:n_sources_target, :, :] *= np.sqrt(sources_var[:, None, None])

        # compute noise variance
        sigma_n = np.sqrt(10 ** (-snr / 10) * np.sum(sources_var))

        # now compute the power of interference signal needed to achieve desired SINR
        sigma_i = np.sqrt(
            np.maximum(0, 10 ** (-sinr / 10) * np.sum(sources_var) - sigma_n ** 2)
            / (n_sources-n_sources_target)
        )
        premix[n_sources_target:, :, :] *= sigma_i
 
       
        background = (
            np.sum(premix[n_sources_target:, :, :], axis=0)
        )
         
        # Mix down the recorded signals
        mix = np.sum(premix, axis=0)
        mics_signals = room.mic_array.signals
       
        print("Simulation done.")
        
        # Monitor Convergence
        ref = np.zeros(
        (n_sources_target+1, premix.shape[2], premix.shape[1]), dtype=premix.dtype)
        ref[:n_sources_target, :, :] = premix[:n_sources_target, :, :].swapaxes(1, 2)
        ref[n_sources_target, :, :] = background.T
        convergence_callback = None

        # START BSS

        X_all = pra.transform.analysis(
            mix.T, framesize, framesize // 2, win=win_a
        ).astype(np.complex128)
        X_mics = X_all[:, :, :n_mics]


        # Run BSS
        if algo == "auxiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "auxiva_pca":
            # Run AuxIVA
            Y = auxiva_pca(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "overiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ilrma":
            # Run AuxIVA
            Y = pra.bss.ilrma(
                X_mics,
                n_iter=n_iter,
                n_components=2,
                proj_back=True,
                callback=convergence_callback,
            )
        elif algo == "ogive":
            # Run OGIVE
            Y = ogive(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ogive_matlab":
            # Run OGIVE
            Y = ogive_matlab_wrapper(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        else:
            raise ValueError("No such algorithm {}".format(algo))


        # Run iSTFT
        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[
                :, None
            ]
            y = y.astype(np.float64)
        else:
            y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype(
                np.float64
            )

        # If some of the output are uniformly zero, just add a bit of noise to compare
        for k in range(y.shape[1]):
            if np.sum(np.abs(y[:, k])) < 1e-10:
                y[:, k] = np.random.randn(y.shape[0]) * 1e-10

        # For conventional methods of BSS, reorder the signals by decreasing power
        if algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        # Compare SIR
       
        m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(
            ref[:n_sources_target, :m, 0],
            y[framesize // 2: m + framesize // 2, :n_sources_target].T,
        )

        # reorder the vector of reconstructed signals
        y_hat = y[:, perm]
        SDRList.append(sdr)
        SIRList.append(sir)
        print("SDR:", sdr)
        print("SIR:", sir)

####save mix and separation #######
        if save:
            from scipy.io import wavfile
            wavfile.write(
                "birdmix.wav",
                room.fs,
                (pra.normalize(mics_signals, bits=16).astype(
                    np.int16).T)[:, 0],
            )
            for i, sig in enumerate(y_hat.T):
                wavfile.write(
                    "birdsep{}.wav".format(i + 1),
                    room.fs,
                    pra.normalize(sig, bits=16).astype(np.int16).T,
                )

Exemplo n.º 5

0

Exibir arquivo

Arquivo: room_builder.py Projeto: wjliu0215/bss_speech_dataset

def random_room_builder(
    wav_files: List[str],
    n_mics: int,
    mic_delta: Optional[float] = None,
    fs: float = 16000,
    t60_interval: Tuple[float, float] = (0.150, 0.500),
    room_width_interval: Tuple[float, float] = (6, 10),
    room_height_interval: Tuple[float, float] = (2.8, 4.5),
    source_zone_height: Tuple[float, float] = [1.0, 2.0],
    guard_zone_width: float = 0.5,
    seed: Optional[int] = None,
):
    """
    This function creates a random room within some parameters.

    The microphone array is circular with the distance between neighboring
    elements set to the maximal distance avoiding spatial aliasing.

    Parameters
    ----------
    wav_files: list of numpy.ndarray
        A list of audio signals for each source
    n_mics: int
        The number of microphones in the microphone array
    mic_delta: float, optional
        The distance between neighboring microphones in the array
    fs: float, optional
        The sampling frequency for the simulation
    t60_interval: (float, float), optional
        An interval where to pick the reverberation time
    room_width_interval: (float, float), optional
        An interval where to pick the room horizontal length/width
    room_height_interval: (float, float), optional
        An interval where to pick the room vertical length
    source_zone_height: (float, float), optional
        The vertical interval where sources and microphones are allowed
    guard_zone_width: float
        The minimum distance between a vertical wall and a source/microphone

    Returns
    -------
    ShoeBox object
        A randomly generated room according to the provided parameters
    float
        The measured T60 reverberation time of the room created
    """

    # save current numpy RNG state and set a known seed
    if seed is not None:
        rng_state = np.random.get_state()
        np.random.seed(seed)

    n_sources = len(wav_files)

    # sanity checks
    assert source_zone_height[0] > 0
    assert source_zone_height[1] < room_height_interval[0]
    assert source_zone_height[0] <= source_zone_height[1]
    assert 2 * guard_zone_width < room_width_interval[1] - room_width_interval[0]

    def random_location(
        room_dim, n, ref_point=None, min_distance=None, max_distance=None
    ):
        """ Helper function to pick a location in the room """

        width = room_dim[0] - 2 * guard_zone_width
        width_intercept = guard_zone_width

        depth = room_dim[1] - 2 * guard_zone_width
        depth_intercept = guard_zone_width

        height = np.diff(source_zone_height)[0]
        height_intercept = source_zone_height[0]

        locs = rand(3, n)
        locs[0, :] = locs[0, :] * width + width_intercept
        locs[1, :] = locs[1, :] * depth + depth_intercept
        locs[2, :] = locs[2, :] * height + height_intercept

        if ref_point is not None:
            # Check condition
            d = np.linalg.norm(locs - ref_point, axis=0)

            if min_distance is not None and max_distance is not None:
                redo = np.where(np.logical_or(d < min_distance, max_distance < d))[0]
            elif min_distance is not None:
                redo = np.where(d < min_distance)[0]
            elif max_distance is not None:
                redo = np.where(d > max_distance)[0]
            else:
                redo = []

            # Recursively call this function on sources to redraw
            if len(redo) > 0:
                locs[:, redo] = random_location(
                    room_dim,
                    len(redo),
                    ref_point=ref_point,
                    min_distance=min_distance,
                    max_distance=max_distance,
                )

        return locs

    c = pra.constants.get("c")

    # Create the room
    # Sometimes the room dimension and required T60 are not compatible, then
    # we just try again with new random values
    retry = True
    while retry:
        try:
            room_dim = np.array(
                [
                    rand() * np.diff(room_width_interval)[0] + room_width_interval[0],
                    rand() * np.diff(room_width_interval)[0] + room_width_interval[0],
                    rand() * np.diff(room_height_interval)[0] + room_height_interval[0],
                ]
            )
            t60 = rand() * np.diff(t60_interval)[0] + t60_interval[0]
            reflection, max_order = inv_sabine(t60, room_dim, c)
            retry = False
        except ValueError:
            pass
    # Create the room based on the random parameters
    room = pra.ShoeBox(room_dim, fs=fs, absorption=1 - reflection, max_order=max_order)

    # The critical distance
    # https://en.wikipedia.org/wiki/Critical_distance
    d_critical = 0.057 * np.sqrt(np.prod(room_dim) / t60)

    # default inter-mic distance is set according to nyquist criterion
    # i.e. 1/2 wavelength corresponding to fs / 2 at given speed of sound
    if mic_delta is None:
        mic_delta = 0.5 * (c / (0.5 * fs))

    # the microphone array is uniformly circular with the distance between
    # neighboring elements of mic_delta
    mic_center = random_location(room_dim, 1)
    mic_rotation = rand() * 2 * np.pi
    mic_radius = 0.5 * mic_delta / np.sin(np.pi / n_mics)
    mic_array = pra.MicrophoneArray(
        np.vstack(
            (
                pra.circular_2D_array(
                    mic_center[:2, 0], n_mics, mic_rotation, mic_radius
                ),
                mic_center[2, 0] * np.ones(n_mics),
            )
        ),
        room.fs,
    )
    room.add_microphone_array(mic_array)

    # Now we will get the sources at random
    source_locs = []

    # Choose the target location at least as far as the critical distance
    # Then the other sources, yet one further meter away
    source_locs = random_location(
        room_dim,
        n_sources,
        ref_point=mic_center,
        min_distance=d_critical,
        max_distance=d_critical + 1,
    )

    source_signals = wav_read_center(wav_files, seed=123)

    for s, signal in enumerate(source_signals):
        room.add_source(source_locs[:, s], signal=signal)

    # pre-compute the impulse responses
    room.compute_rir()

    # average the t60 of all the RIRs
    t60_actual = np.mean(
        [
            pra.experimental.measure_rt60(room.rir[0][0], room.fs)
            for mic_list in room.rir
            for rir in mic_list
        ]
    )

    # save the parameters
    room_params = {
        "shoebox_params": {
            "room_dim": room_dim.tolist(),
            "fs": fs,
            "absorption": 1 - reflection,
            "max_order": max_order,
        },
        "mics": mic_array.R.T.tolist(),
        "sources": {"locs": source_locs.T.tolist(), "signals": wav_files},
        "t60": t60_actual.tolist(),
    }

    # restore numpy RNG former state
    if seed is not None:
        np.random.set_state(rng_state)

    return room, room_params

Exemplo n.º 6

0

Exibir arquivo

Arquivo: mbss_sim.py Projeto: cohendrake/blinky-iva

def one_loop(args):

    import numpy
    np = numpy

    import pyroomacoustics
    pra = pyroomacoustics

    import sys
    sys.path.append(parameters['base_dir'])

    from routines import semi_circle_layout, random_layout, gm_layout, grid_layout
    from blinkiva import blinkiva
    from blinkiva_gauss import blinkiva_gauss
    from generate_samples import wav_read_center

    n_targets, n_mics, rt60, sinr, wav_files, seed = args

    # this is the underdetermined case. We don't do that.
    if n_mics < n_targets:
        return []

    # set MKL to only use one thread if present
    try:
        import mkl
        mkl.set_num_threads(1)
    except ImportError:
        pass

    # set the RNG seed
    rng_state = np.random.get_state()
    np.random.seed(seed)

    # STFT parameters
    framesize = parameters['stft_params']['framesize']
    win_a = pra.hann(framesize)
    win_s = pra.transform.compute_synthesis_window(win_a, framesize // 2)

    # Generate the audio signals

    # get the simulation parameters from the json file
    # Simulation parameters
    n_repeat = parameters['n_repeat']
    fs = parameters['fs']
    snr = parameters['snr']

    n_interferers = parameters['n_interferers']
    n_blinkies = parameters['n_blinkies']
    ref_mic = parameters['ref_mic']
    room_dim = np.array(parameters['room_dim'])

    sources_var = np.ones(n_targets)
    sources_var[0] = parameters['weak_source_var']

    # total number of sources
    n_sources = n_interferers + n_targets

    # Geometry of the room and location of sources and microphones
    interferer_locs = random_layout([3., 5.5, 1.5], n_interferers, offset=[6.5, 1., 0.5], seed=1)

    target_locs = semi_circle_layout(
            [4.1, 3.755, 1.2],
            np.pi / 1.5, 2.,  # 120 degrees arc, 2 meters away
            n_targets,
            rot=0.743 * np.pi,
            )

    source_locs = np.concatenate((target_locs, interferer_locs), axis=1)

    if parameters['blinky_geometry'] == 'gm':
        ''' Normally distributed in the vicinity of each source '''
        blinky_locs = gm_layout(
                n_blinkies, target_locs - np.c_[[0., 0., 0.5]],
                std=[0.4, 0.4, 0.05], seed=987,
                )

    elif parameters['blinky_geometry'] == 'grid':
        ''' Placed on a regular grid, with a little bit of noise added '''
        blinky_locs = grid_layout([3.,5.5], n_blinkies, offset=[1., 1., 0.7], seed=987,)

    else:
        ''' default is semi-circular '''
        blinky_locs = semi_circle_layout(
                [4.1, 3.755, 1.1],
                np.pi, 3.5,
                n_blinkies,
                rot=0.743 * np.pi - np.pi / 4,
                seed=987,
                )

    mic_locs = np.vstack((
        pra.circular_2D_array([4.1, 3.76], n_mics, np.pi / 2, 0.02),
        1.2 * np.ones((1, n_mics)),
        ))
    all_locs = np.concatenate((mic_locs, blinky_locs), axis=1)


    signals = wav_read_center(wav_files, seed=123)

    # Create the room itself
    room = pra.ShoeBox(room_dim,
            fs=fs,
            absorption=parameters['rt60_list'][rt60]['absorption'],
            max_order=parameters['rt60_list'][rt60]['max_order'],
            )

    # Place all the sound sources
    for sig, loc in zip(signals[-n_sources:,:], source_locs.T,):
        room.add_source(loc, signal=sig)

    assert len(room.sources) == n_sources, 'Number of signals ({}) doesn''t match number of sources ({})'.format(signals.shape[0], n_sources)

    # Place the microphone array
    room.add_microphone_array(
            pra.MicrophoneArray(all_locs, fs=room.fs)
            )

    # compute RIRs
    room.compute_rir()

    # Run the simulation
    premix = room.simulate(return_premix=True)

    # Normalize the signals so that they all have unit
    # variance at the reference microphone
    p_mic_ref = np.std(premix[:,ref_mic,:], axis=1)
    premix /= p_mic_ref[:,None,None]

    # scale to pre-defined variance
    premix[:n_targets,:,:] *= np.sqrt(sources_var[:,None,None])

    # compute noise variance
    sigma_n = np.sqrt(10 ** (- snr / 10) * np.sum(sources_var))

    # now compute the power of interference signal needed to achieve desired SINR
    sigma_i = np.sqrt(
            np.maximum(0, 10 ** (- sinr / 10) * np.sum(sources_var) - sigma_n ** 2) / n_interferers
            )
    premix[n_targets:,:,:] *= sigma_i

    # Mix down the recorded signals
    mix = np.sum(premix, axis=0) + sigma_n * np.random.randn(*premix.shape[1:])

    ref = np.moveaxis(premix, 1, 2)
    
    # START BSS
    ###########

    # pre-emphasis on blinky signals
    if parameters['use_pre_emphasis']:
        mix[n_mics:,:-1] = np.diff(mix[n_mics:,:], axis=1)
        mix[n_mics:,-1] = 0.

    # shape: (n_frames, n_freq, n_mics)
    X_all = pra.transform.analysis(
            mix.T,
            framesize, framesize // 2,
            win=win_a,
            ) 
    X_mics =  X_all[:,:,:n_mics]
    U_blinky = np.sum(np.abs(X_all[:,:,n_mics:]) ** 2, axis=1)  # shape: (n_frames, n_blinkies)

    # convergence monitoring callback
    def convergence_callback(Y, n_targets, SDR, SIR, ref, framesize, win_s, algo_name):
        from mir_eval.separation import bss_eval_sources
        y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s,)

        if not algo_name.startswith('blinkiva'):
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:,new_ord]

        m = np.minimum(y.shape[0]-framesize//2, ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(
                ref[:n_targets,:m,0],
                y[framesize//2:m+framesize//2,:n_targets].T,
                )
        SDR.append(sdr.tolist())
        SIR.append(sir.tolist())

    # store results in a list, one entry per algorithm
    results = []

    for name, kwargs in parameters['algorithm_kwargs'].items():

        results.append({ 
                'algorithm' : name,
                'n_targets' : n_targets,
                'n_mics' : n_mics,
                'rt60' : rt60,
                'sinr' : sinr,
                'seed' : seed,
                'sdr' : [], 'sir' : [],  # to store the result
                })

        if parameters['monitor_convergence']:
            cb = lambda Y :  convergence_callback(
                    Y, n_targets,
                    results[-1]['sdr'], results[-1]['sir'],
                    ref, framesize, win_s, name,
                    )
        else:
            cb = None
            # In that case, we still want to capture the initial values of SDR/SIR
            convergence_callback(
                    X_mics, n_targets,
                    results[-1]['sdr'], results[-1]['sir'],
                    ref, framesize, win_s, name,
                    )

        if name == 'auxiva':
            # Run AuxIVA
            Y = pra.bss.auxiva(
                    X_mics,
                    callback=cb,
                    **kwargs,
                    )

        elif name == 'blinkiva':
            # Run BlinkIVA
            Y = blinkiva(
                    X_mics, U_blinky, n_src=n_targets,
                    callback=cb,
                    **kwargs,
                    )

        elif name == 'blinkiva-gauss':
            # Run BlinkIVA
            Y = blinkiva_gauss(
                    X_mics, U_blinky, n_src=n_targets,
                    callback=cb,
                    **kwargs,
                    )

        else:
            continue

        # The last evaluation
        convergence_callback(
                Y, n_targets,
                results[-1]['sdr'], results[-1]['sir'],
                ref, framesize, win_s, name,
                )

    # restore RNG former state
    np.random.set_state(rng_state)

    return results

Exemplo n.º 7

0

Exibir arquivo

    def line_createroom(Bird1,Bird2,Bird3,callback_mix):

        roomdim = np.array([20, 20, 10]) 
        max_order = 17  
        absorption = 0.9    

        mic_p = [13, 10, 3.5]   #mic_center_point
        mic_d = 0.015      
            #mic_dinstance
        sour_p = [7,10,6]    #source_postion
        sour_d = 5           #source_distance

        n_mics = 4          #mic_number
        n_sources = 3 

        mic_rot = np.pi/2 
        bird_rot = np.pi/2 
    
     
        ### params setting ###
        np.random.seed(10)
        # STFT parameters
        framesize = 4096
        win_a = pra.hann(framesize)
        win_s = pra.transform.compute_synthesis_window(
            win_a, framesize // 2)
        ogive_mu = 0.1
        ogive_update = "switching"
        ogive_iter = 2000
        SIR = 10  # dB
        SNR = (60)
        algo = algo_choices[0]
        no_cb = True
        save = True
        n_iter = 60  
        dist = "gauss"  # laplace
        fs = 44100
        n_sources_target = 3
        assert n_sources_target <= n_mics, "More sources than microphones is not supported"

   
        source_std = np.ones(n_sources_target)
        # room size
        room_dim = roomdim

        # micro position
        mic_locs = semi_line_layout(mic_p,mic_rot,mic_d,n_mics)
     

        # target position 
        source_locs = semi_line_layout(sour_p,bird_rot,sour_d,n_sources)
   
        source_locs[0][0],source_locs[0][2] = source_locs[0][0]+0,source_locs[0][2]+0##push value
   
        # target_locs = np.transpose([[7, 10, 6], [9, 16, 6]])

        # audio loaded
        wav_files = [Bird1,Bird2,Bird3]
     
        signals = wav_read_center(wav_files, seed=123)

        #create room
        room = pra.ShoeBox(room_dim, fs=44100, absorption=absorption,
                            max_order=max_order, air_absorption=True, humidity=50)

        # add source
        for sig, loc in zip(signals, source_locs.T):
            room.add_source(loc, signal=sig)

        # add micro
        room.add_microphone_array(
            pra.MicrophoneArray(mic_locs, fs=room.fs))

        callback_mix_kwargs = {
            "snr": SNR,
            "sir": SIR,
            "n_src": n_sources,
            "n_tgt": n_sources_target,
            "src_std": source_std,
            "ref_mic": 0,
        }
        # # draw
        # x = mic_locs[:2][0]
        # y = mic_locs[:2][1]
        # import matplotlib.pyplot as plt
        # plt.scatter(x,y)
        # plt.axis('equal')
        # plt.xlim([0,20])
        # plt.ylim([0,20])
        # x1 = source_locs[:2][0]
        # y1 = source_locs[:2][1]
        # plt.scatter(x1,y1)
        # plt.xlim([0,20])
        # plt.ylim([0,20])
        # plt.axis('equal')
        # plt.show()

        # Run the simulation
        separate_recordings = room.simulate(
            callback_mix=callback_mix,
            callback_mix_kwargs=callback_mix_kwargs,
            return_premix=True,
        )
        mics_signals = room.mic_array.signals
        print("line Simulation done.")

   

        # Monitor Convergence
        ref = np.moveaxis(separate_recordings, 1, 2)
        if ref.shape[0] < n_mics:
            ref = np.concatenate(
                (ref, np.random.randn(n_mics -
                                        ref.shape[0], ref.shape[1], ref.shape[2])),
                axis=0,
            )

        convergence_callback = None

        X_all = pra.transform.analysis(
            mics_signals.T, framesize, framesize // 2, win=win_a
        ).astype(np.complex128)
        X_mics = X_all[:, :, :n_mics]

        tic = time.perf_counter()

        if algo == "auxiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "auxiva_pca":
            # Run AuxIVA
            Y = auxiva_pca(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                callback=convergence_callback,
            )
        elif algo == "overiva":
            # Run AuxIVA
            Y = overiva(
                X_mics,
                n_src=n_sources_target,
                n_iter=n_iter,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
           
        elif algo == "ilrma":
            # Run AuxIVA
            Y = pra.bss.ilrma(
                X_mics,
                n_iter=n_iter,
                n_components=2,
                proj_back=True,
                callback=convergence_callback,
            )
        elif algo == "ogive":
            # Run OGIVE
            Y = ogive(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                model=dist,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        elif algo == "ogive_matlab":
            # Run OGIVE
            Y = ogive_matlab_wrapper(
                X_mics,
                n_iter=ogive_iter,
                step_size=ogive_mu,
                update=ogive_update,
                proj_back=True,
                init_eig=(init == init_choices[1]),
                callback=convergence_callback,
            )
        else:
            raise ValueError("No such algorithm {}".format(algo))

    

    

        # Run iSTFT
        if Y.shape[2] == 1:
            y = pra.transform.synthesis(Y[:, :, 0], framesize, framesize // 2, win=win_s)[
                :, None
            ]
            y = y.astype(np.float64)
        else:
            y = pra.transform.synthesis(Y, framesize, framesize // 2, win=win_s).astype(
                np.float64
            )

        # If some of the output are uniformly zero, just add a bit of noise to compare
        for k in range(y.shape[1]):
            if np.sum(np.abs(y[:, k])) < 1e-10:
                y[:, k] = np.random.randn(y.shape[0]) * 1e-10

        # For conventional methods of BSS, reorder the signals by decreasing power
        if algo != "blinkiva":
            new_ord = np.argsort(np.std(y, axis=0))[::-1]
            y = y[:, new_ord]

        # Compare SIR
        m = np.minimum(y.shape[0] - framesize // 2, ref.shape[1])
        sdr, sir, sar, perm = bss_eval_sources(
            ref[:n_sources_target, :m, 0],
            y[framesize // 2: m + framesize // 2, :n_sources_target].T,
        )

        # reorder the vector of reconstructed signals
        y_hat = y[:, perm]

        #return 
        mixdata = pra.normalize(mics_signals, bits=16).astype(np.int16).T
        separationdata = []
        for sig in y_hat.T:
            separationdata.append(pra.normalize(sig, bits=16).astype(np.int16).T)
        print("sdr",sdr)        
        return sdr,sir,mixdata,separationdata  #wavefile(mixdata) wavefile(separationdata[0]) wavefile(separationdata[1])