Exemplo n.º 1
0
def test_virtual_mic():
    from pyutils.iolib.audio import load_wav, save_wav
    mic = VirtualStereoMic()
    mono, rate = load_wav('wav_test/piano.wav')
    mono = mono[:, 0]

    positions = [[float(num) for num in l.strip().split()] for l in open('wav_test/piano_stat_position.txt', 'r')]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]
    source = PositionalSource(mono, positions[0], rate)

    stereo = mic.binauralize([source])
    save_wav('/tmp/output.wav', stereo, rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')

    positions = [[float(num) for num in l.strip().split()] for l in open('wav_test/piano_mov_position.txt', 'r')]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]
    source = MovingSource(mono, positions, rate)

    stereo = np.zeros((mono.shape[0], 2))
    while source.tic():
        mic.binauralize_frame([source], stereo, source.cur_idx)
    save_wav('/tmp/output.wav', stereo, rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')
Exemplo n.º 2
0
def run(input_fn, position_fn, ambi_order, output_fn):
    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn(
            'Input waveform is not a mono source. Using only first channel.')
        mono = mono[:, 0]

    fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate)
    encoder = AmbiEncoder(fmt)
    positions = [
        np.array([float(num) for num in l.strip().split()])
        for l in open(position_fn, 'r')
    ]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]

    if len(positions) == 1:
        # Stationary source
        source = PositionalSource(mono, positions[0], rate)
        ambi = encoder.encode(source)

    else:
        source = MovingSource(mono, positions, rate)
        ambi = AmbisonicArray(np.zeros((mono.shape[0], fmt.num_channels)), fmt)
        while source.tic():
            encoder.encode_frame(source, ambi, source.cur_idx)

    binauralizer = DirectAmbisonicBinauralizer(fmt, method='projection')
    # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)
    stereo = binauralizer.binauralize(ambi.data)
    save_wav(output_fn, stereo, rate)
Exemplo n.º 3
0
def test_source_binauralizer():
    from pyutils.iolib.audio import load_wav, save_wav
    from pyutils.iolib.position import read_position_file

    # binauralizer = SourceBinauralizer(use_hrtfs=True, cipic_dir='hrtfs/cipic_subj3')
    binauralizer = SourceBinauralizer(use_hrtfs=False)

    # Static source
    sample = 'wav_test/gen_synthetic-S1'
    positions, wav_fns, _, sample_ids = read_position_file(sample+'-position.txt')
    mono, rate = load_wav(wav_fns[sample_ids[0]])
    source = PositionalSource(mono[:, 0], positions[sample_ids[0]][0], rate)
    stereo = binauralizer.binauralize([source])

    save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')

    # Moving source
    sample = 'wav_test/gen_synthetic-M1'
    positions, wav_fns, _, sample_ids = read_position_file(sample+'-position.txt')
    mono, rate = load_wav(wav_fns[sample_ids[0]])
    source = MovingSource(mono[:, 0], positions[sample_ids[0]], rate)
    stereo = np.zeros((mono.shape[0], 2))
    while source.tic():
        binauralizer.binauralize_frame([source], stereo, source.cur_idx)

    save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')
Exemplo n.º 4
0
def run(input_fn, x, y, z, ambi_order, output_fn):
    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn('Input waveform is nor a mono source. Using only first channel.')
        mono = mono[:, 0]

    encoder = AmbiEncoder(AmbiFormat(ambi_order=ambi_order, sample_rate=rate))
    source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate)
    ambi = encoder.encode(source)
    save_wav(output_fn, ambi.data, rate)
Exemplo n.º 5
0
def main(args):
    os.environ["CUDA_VISIBLE_DEVICES"] = "%d" % args.gpu
    tmp_ambix_fn = tempfile.mktemp(prefix='/tmp/', suffix='.wav')
    tmp_video_fn = tempfile.mktemp(prefix='/tmp/', suffix='.mp4')

    model = W2XYZ(args.model_dir)
    ambi_pred = model.deploy(args.input_folder, args.deploy_start,
                             args.deploy_duration)

    # dur_t = model.model.duration
    # snd1 = model.deploy(args.input_folder, args.deploy_start - dur_t/2, args.deploy_duration + dur_t)
    # hann1 = np.hanning(model.model.snd_dur)
    # hann1 = np.tile(hann1, snd1.shape[0]/hann1.size)[:, np.newaxis]
    # ss = model.model.snd_dur/2
    # t = int(args.deploy_duration * model.params.audio_rate)
    # snd1 = snd1[ss:ss+t]
    # hann1 = hann1[ss:ss+t]

    # snd2 = model.deploy(args.input_folder, args.deploy_start, args.deploy_duration + dur_t)
    # hann2 = np.hanning(model.model.snd_dur)
    # hann2 = np.tile(hann2, snd2.shape[0]/hann2.size)[:, np.newaxis]
    # ss = 0
    # t = int(args.deploy_duration * model.params.audio_rate)
    # snd2 = snd2[ss:ss+t]
    # hann2 = hann2[ss:ss+t]

    # ambi_pred = (snd1 * hann1 + snd2 * hann2) / (hann1 + hann2)

    # Save ambisonics
    save_wav(tmp_ambix_fn, ambi_pred, model.params.audio_rate)

    if args.save_ambix:
        print('Saving ambisonics wav...')
        cmd = 'ffmpeg -y -i {} -strict -2 {}'.format(tmp_ambix_fn,
                                                     args.output_fn)
        os.system(cmd)

    if args.save_video:
        print('Saving video...')
        cmd = 'ffmpeg -y -ss {} -i {} -t {} {}'.format(args.deploy_start,
                                                       args.video,
                                                       args.deploy_duration,
                                                       tmp_video_fn)
        os.system(cmd)

        myutils.gen_360video(tmp_ambix_fn,
                             tmp_video_fn,
                             args.output_fn,
                             overlay_map=args.overlay_map,
                             inject_meta=args.VR,
                             binauralize=not args.VR)

        os.remove(tmp_video_fn)
    os.remove(tmp_ambix_fn)
def run(input_fn, x, y, z, output_fn, use_hrtfs, hrtf_dir):

    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn(
            'Input waveform is nor a mono source. Using only first channel.')
        mono = mono[:, 0]

    binauralizer = SourceBinauralizer(use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)
    source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate)
    stereo = binauralizer.binauralize(source)
    save_wav(output_fn, stereo, rate)
Exemplo n.º 7
0
def run(input_fn, output_fn, overwrite=False):
    if overwrite and os.path.exists(output_fn):
        os.remove(output_fn)
    assert not os.path.exists(output_fn)

    data, rate = load_wav(input_fn)
    ambi_order = int(np.sqrt(data.shape[1]) - 1)

    fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate)
    binauralizer = DirectAmbisonicBinauralizer(fmt, method='pseudoinv')
    # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)
    stereo = binauralizer.binauralize(data)
    save_wav(output_fn, stereo, rate)
def run(input_fn, x, y, z, ambi_order, output_fn):
    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn('Input waveform is nor a mono source. Using only first channel.')
        mono = mono[:, 0]

    encoder = AmbiEncoder(AmbiFormat(ambi_order=ambi_order, sample_rate=rate))
    source = PositionalSource(mono, Position(x, y, z, 'cartesian'), rate)
    ambi = encoder.encode(source)

    binauralizer = DirectAmbisonicBinauralizer(ambi.format, method='projection')
    # binauralizer = AmbisonicBinauralizer(ambi.format, method='projection', use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)
    stereo = binauralizer.binauralize(ambi.data)
    save_wav(output_fn, stereo, rate)
Exemplo n.º 9
0
def extract_frames(audio_fn, video_fn, frames_dir, yid, overwrite):
    print('\n'+'='*30+' '+yid+' '+'='*30)

    # Prepare directory tree
    if not os.path.isdir(frames_dir):
        os.makedirs(frames_dir)

    audio_dir = os.path.join(frames_dir, 'ambix')
    if os.path.isdir(audio_dir):
        if overwrite:
            shutil.rmtree(audio_dir)
            os.makedirs(audio_dir)
    else:
        os.makedirs(audio_dir)

    video_dir = os.path.join(frames_dir, 'video')
    if os.path.isdir(video_dir):
        if overwrite:
            shutil.rmtree(video_dir)
            os.makedirs(video_dir)
    else:
        os.makedirs(video_dir)

    # Open readers
    audio_reader = AudioReader(audio_fn)
    video_reader = VideoReader(video_fn)
    duration_secs = int(min(audio_reader.duration, video_reader.duration))

    # Ambisonics
    print('({}) Splitting ambisonics into chunks'.format(yid))
    sys.stdout.flush()
    reader = AudioReader(audio_fn, rate=48000)
    for i in range(duration_secs):
        chunk_fn = os.path.join(audio_dir, '{:06d}.wav'.format(i))
        chunk = reader.get_chunk(reader.rate)
        save_wav(chunk_fn, chunk, reader.rate)

    # Video
    print('({}) Splitting video into frames'.format(yid))
    sys.stdout.flush()
    reader = VideoReader(video_fn)
    num_frames = int(reader.fps * duration_secs)
    for i in range(num_frames):
        frame_fn = os.path.join(video_dir, '{:06d}.jpg'.format(i))
        img = reader.get()
        sio.imsave(frame_fn, img)
Exemplo n.º 10
0
def run(position_fn, ambi_order, output_fn, rate=24000, base_dir=None, randomize=False, overwrite=False):
    if overwrite and os.path.exists(output_fn):
        os.remove(output_fn)
    assert not os.path.exists(output_fn)

    if base_dir is None:
        base_dir = ESC_BASE

    sample_ids, positions, input_fn, _, _ = read_position_file(position_fn)
    source, _ = librosa.load(os.path.join(base_dir, input_fn['source']), sr=rate)
    bkg, _ = librosa.load(os.path.join(base_dir, input_fn['ambient']), sr=rate)
    Psrc = np.convolve(source ** 2, np.ones((int(rate * 0.1),)) / (rate * 0.1)).max()
    Pbkg = np.convolve(bkg ** 2, np.ones((int(rate * 0.1),)) / (rate * 0.1)).max()
    bkg *= 0.1*Psrc/Pbkg

    data = {}
    for smp_id in sample_ids:
        fn = os.path.join(base_dir, input_fn[smp_id])
        mono, _ = librosa.load(fn, sr=rate)
        if mono.ndim == 2:
            mono = mono[:, 0]
        data[smp_id] = mono

    fmt = AmbiFormat(ambi_order=ambi_order, sample_rate=rate)
    encoder = AmbiEncoder(fmt)

    sources = [MovingSource(data[smp_id], positions[smp_id], rate)
               for smp_id in sample_ids if len(positions[smp_id])]
    nframes = max([v.shape[0] for v in data.values()])
    ambix = AmbisonicArray(np.zeros((nframes, fmt.num_channels)), fmt)
    t = -1
    while all([src.tic() for src in sources]):
        t += 1
        encoder.encode_frame(sources, ambix, t)

    ambix = ambix.data
    for smp_id in sample_ids:
        if len(positions[smp_id]) == 0: # Ambient sound
            ambix[:data[smp_id].size, 0] += data[smp_id]
    ambix = ambix / ambix.max() * 0.95

    save_wav(output_fn, ambix, rate)
def run(input_fn, position_fn, output_fn, use_hrtfs, hrtf_dir):
    mono, rate = load_wav(input_fn)
    if mono.ndim == 2 and mono.shape[1] > 1:
        warnings.warn('Input waveform is not a mono source. Using only first channel.')
        mono = mono[:, 0]

    positions = [[float(num) for num in l.strip().split()] for l in open(position_fn, 'r')]
    positions = [Position(p[0], p[1], p[2], 'polar') for p in positions]
    binauralizer = SourceBinauralizer(use_hrtfs=use_hrtfs, cipic_dir=hrtf_dir)

    if len(positions) == 1:     # Stationary source
        source = PositionalSource(mono, positions[0], rate)
        stereo = binauralizer.binauralize(source)

    else:
        source = MovingSource(mono, positions, rate)
        stereo = np.zeros((mono.shape[0], 2))
        while source.tic():
            binauralizer.binauralize_frame(source, stereo, source.cur_idx)

    save_wav(output_fn, stereo, rate)
Exemplo n.º 12
0
def test_ambisonics_binauralizer():
    from pyutils.iolib.audio import load_wav, save_wav
    from pyutils.ambisonics.common import AmbiFormat

    sample = 'wav_test/gen_synthetic-S1'
    ambi, rate = load_wav(sample+'-ambix.wav')

    fmt = AmbiFormat(1, rate)
    binauralizer = DirectAmbisonicBinauralizer(fmt, method='pseudoinv')
    # binauralizer = AmbisonicBinauralizer(fmt, method='projection', use_hrtfs=True, cipic_dir='hrtfs/cipic_subj3')

    stereo = binauralizer.binauralize(ambi)

    save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')

    sample = 'wav_test/gen_synthetic-M1'
    ambi, rate = load_wav(sample+'-ambix.wav')
    stereo = binauralizer.binauralize(ambi)

    save_wav('/tmp/output.wav', stereo / np.abs(stereo).max(), rate)
    os.system('play /tmp/output.wav')
    os.remove('/tmp/output.wav')
Exemplo n.º 13
0
def gen_360video(audio_fn,
                 video_fn,
                 output_fn,
                 inject_meta=False,
                 overlay_map=False,
                 binauralize=False,
                 no_spatial_audio=False):
    from pyutils.iolib.video import VideoReader, VideoWriter
    from pyutils.iolib.audio import load_wav, save_wav
    from pyutils.ambisonics.distance import SphericalAmbisonicsVisualizer
    import tempfile
    from matplotlib import pyplot as plt
    from skimage.transform import resize

    tmp_file = tempfile.mktemp(dir='/tmp/', suffix='.mp4')
    tmp_snd_file = tempfile.mktemp(dir='/tmp/', suffix='.wav')
    tmp_vid_file = tempfile.mktemp(dir='/tmp/', suffix='.mp4')

    print('Splitting')
    cmd = 'ffmpeg -i {} -vn -strict -2 {}'.format(audio_fn, tmp_snd_file)
    print(cmd)
    os.system(cmd)

    cmd = 'ffmpeg -i {} -an -vcodec copy {}'.format(video_fn, tmp_vid_file)
    print(cmd)
    os.system(cmd)

    if overlay_map:
        print('Overlaying spherical map')
        tmp_vid_file2 = tempfile.mktemp(dir='/tmp/', suffix='.mp4')
        ambix, snd_rate = load_wav(tmp_snd_file)
        reader = VideoReader(tmp_vid_file, rate=10)
        writer = VideoWriter(tmp_vid_file2, reader.fps)
        ambiVis = SphericalAmbisonicsVisualizer(ambix[::5], snd_rate / 5.,
                                                5. / reader.fps, 5.)
        cmap = plt.cm.YlOrRd(np.linspace(0, 1, 256))[:, :3]

        cur_rms = ambiVis.get_next_frame()
        cur_rms = (cur_rms - cur_rms.min()) / (cur_rms.max() - cur_rms.min() +
                                               0.005)
        while True:
            prev_rms = cur_rms
            cur_rms = ambiVis.get_next_frame()
            if cur_rms is None:
                break
            cur_rms = (cur_rms - cur_rms.min()) / (cur_rms.max() -
                                                   cur_rms.min() + 0.005)

            for i in range(5):
                frame = reader.get()
                if frame is None:
                    break

                beta = i / 5.
                rms = (1 - beta) * prev_rms + beta * cur_rms
                rms = rms * 2. - 0.7
                rms[rms < 0] = 0
                dir_map = (rms * 255).astype(int)
                dir_map[dir_map > 255] = 255
                dir_map = resize(cmap[dir_map], reader.frame_shape[:2]) * 255

                alpha = resize(rms[:, :, np.newaxis],
                               reader.frame_shape[:2]) * 0.6
                overlay = alpha * dir_map + (1 - alpha) * frame
                writer.write_frame(overlay.astype(np.uint8))

        del writer, reader
        os.remove(tmp_vid_file)
        tmp_vid_file = tmp_vid_file2

    if binauralize:
        print('Binauralizing')
        tmp_snd_file2 = tempfile.mktemp(dir='/tmp/', suffix='.wav')
        ambix, snd_rate = load_wav(tmp_snd_file)
        stereo = np.stack(
            [ambix[:, 0] + ambix[:, 1], ambix[:, 0] - ambix[:, 1]], 1)
        stereo /= (np.abs(stereo).max() / 0.95)
        save_wav(tmp_snd_file2, stereo, snd_rate)

        os.remove(tmp_snd_file)
        tmp_snd_file = tmp_snd_file2

    print('Mixing')
    cmd = 'ffmpeg -y -i {} -i {} -vcodec copy -strict -2 {}'.format(
        tmp_snd_file, tmp_vid_file, tmp_file)
    print(cmd)
    os.system(cmd)

    cwd = os.getcwd()
    output_fn = os.path.join(cwd, output_fn)

    if inject_meta:
        print('Injecting metadata')
        file_dir = os.path.dirname(os.path.realpath(__file__))
        spt_media_dir = os.path.realpath(
            os.path.join(file_dir, '3rd-party', 'spatial-media'))
        os.chdir(spt_media_dir)
        os.system('python spatialmedia -i --stereo=none {} {} {} '.format(
            '' if no_spatial_audio else '--spatial-audio', tmp_file,
            output_fn))
        os.chdir(cwd)
        os.remove(tmp_file)

    else:
        import shutil
        shutil.move(tmp_file, output_fn)

    os.remove(tmp_snd_file)
    os.remove(tmp_vid_file)