def collect_wavs(filenames, dest_fs=None):
    """

    Collects and packages a set of wav files to an array of samples

    Args:
        filenames: File locations as a list
        dest_fs: Sampling frequency to use

    Returns:
        An array of the samples of the files as [N_files x N_samples]

    """
    import numpy as np
    from scipy.io.wavfile import read
    from utils_spaudio import my_resample
    if not (isinstance(filenames, list) or isinstance(filenames, tuple)):
        filenames = [filenames]
    samples = []
    max_len = 0
    for the_filename in filenames:
        fs, new_samples = read(the_filename)
        if dest_fs:
            new_samples = my_resample(new_samples, fs, dest_fs)
        if new_samples.ndim == 1:
            new_samples = np.atleast_2d(new_samples).T
        max_len = max(max_len, new_samples.shape[0])
        samples.append(new_samples)
    for i in range(len(samples)):
        this_len = samples[i].shape[0]
        missing = max_len - this_len
        if missing > 0:
            samples[i] = np.concatenate((samples[i],
                                         np.zeros(
                                             (missing, samples[i].shape[1]),
                                             dtype=samples[i].dtype)))

    out = np.concatenate([samples[i].T for i in range(len(samples))], axis=0)

    return out
def viz_net(model_loc,
            air_loc=None,
            nrows=4,
            interactive=False,
            channel=0,
            layer_idx=0,
            speechfile=None):
    """
    Visualization worker. Accepts the model and a set of audio data which can be filtered by the
    network to provide the visualizations. If no input audio data are given, then the filter
    kernels are visualised.

    Args:
        model_loc: The location of the model saved by keras an HDF5 dataset
        air_loc: Location of AIR file .wav
        nrows: Number of rows used in plotting
        interactive: Interactive plotting (waits for you to close the plots)
        channel: Channel of the AIR to do
        layer_idx: =n. The output of the n-th convolutional layer will be used to collect the
        feature maps.
        speechfile: A speech file which will be convolved with the AIR before filtering.

    Returns:

    """

    try:
        from os.path import basename
    except ImportError:
        raise
    from scipy.signal import fftconvolve

    outdir = '/tmp/training_surface_models/' + basename(model_loc).replace(
        '.h5', '') + '/'

    i_made_the_model = False
    try:
        model = load_model(model_loc)
    except ValueError as ME1:
        i_made_the_model = True
        try:
            from ace_discriminative_nets import get_model_speech
            print('Failed to use default load for model ' + model_loc +
                  ' wil try for speech cnn because ' + ME1.message)
            model = get_model_speech((500, 161), 7, use_cnn=True)
            model.load_weights(model_loc, by_name=True)
            print('CNN model constructed OK')
        except ValueError as ME2:
            try:
                print('Failed to use cnn model ' + model_loc +
                      ' wil try for speech cnn-rnn because ' + ME2.message)
                from ace_discriminative_nets import get_model_speech
                model = get_model_speech((500, 161),
                                         7,
                                         use_cnn=True,
                                         use_rnn=True)
                model.load_weights(model_loc, by_name=True)
                print('CNN-RNN model constructed OK')
            except ValueError as ME3:
                print('Failed to use cnn-rnn model ' + model_loc +
                      ' wil try for speech cnn-rnn because ' + ME3.message)
                raise ME1

    if air_loc is None:
        viz_net_individual(model, outdir, nrows=nrows, interactive=interactive)
        return

    conv_layers = []
    conv_layers_idxs = []
    for idx, i in enumerate(model.layers):
        if isinstance(i, Conv2D):
            conv_layers.append(i)
            conv_layers_idxs.append(idx)

    from keras.models import Model

    if layer_idx == -1:
        effective_idx = 1
    else:
        if layer_idx >= len(conv_layers_idxs):
            effective_idx = conv_layers_idxs[-1] + 2
            print(
                'I will assume that you want the next layer after the last conv layer which i will '
                'assume is a max poolign layer')
        else:
            effective_idx = conv_layers_idxs[layer_idx]
    print('Picking Layer ' + model.layers[effective_idx].name)
    model = Model(inputs=[model.input],
                  outputs=[model.layers[effective_idx].output])

    if speechfile is not None:
        fs_speech, x_speech = wavfile.read(speechfile)
        if x_speech.ndim > 1:
            x_speech = x_speech[:, 0]
    else:
        x_speech = None
        fs_speech = None

    for this_air in air_loc:
        if this_air == 'white':
            suffix = '_white'
            in_shape = model.input_shape[1:]
            x = np.atleast_2d(
                np.random.normal(0, 1., size=np.prod(in_shape) * 4))
        else:
            suffix = '_' + run_command('basename ' + this_air)[0]
            print('Loading: ' + this_air)
            fs, x = wavfile.read(this_air)
            x = x[:, channel]
            if speechfile is not None:
                print('Will convolve with ' + speechfile)
                if not fs_speech == fs:
                    x_speech_effective = my_resample(
                        x_speech[0:int(max_speech_len * fs_speech)], fs_speech,
                        fs)
                else:
                    x_speech_effective = x_speech
                x_speech_effective.setflags(write=1)
                if trim_speech_to is not None:
                    x_speech_effective[int(trim_speech_to * fs_speech):] = 0
                x = fftconvolve(x_speech_effective, x, mode='same')
            else:
                if i_made_the_model:
                    raise AssertionError(
                        'Because the default model-load failed i was going '
                        'to try to construct speech '
                        'models but you did not provide any speech data')
            if x.ndim > 1:
                x = x[:, 0]
            x = np.atleast_2d(x)
        suffix += '_l' + str(layer_idx)
        viz_net_other(model,
                      x,
                      suffix,
                      outdir,
                      nrows=nrows,
                      interactive=interactive,
                      doing_speech=speechfile is not None)
def read_airs_from_wavs(wav_files,
                        framesize=None,
                        get_pow_spec=True,
                        max_air_len=None,
                        fs=None,
                        forced_fs=None,
                        keep_ids=None,
                        cacheloc='/tmp/',
                        start_at_max=True,
                        read_cached_latest=False,
                        wavform_logpow=False,
                        write_cached_latest=True,
                        max_speech_read=None,
                        max_air_read=None,
                        utt_per_env=1,
                        parse_as_dirctories=True,
                        speech_files=None,
                        save_speech_associations=True,
                        save_speech_examples=10,
                        drop_speech=False,
                        as_hdf5_ds=True,
                        choose_channel=None,
                        no_fex=False,
                        scratchpad='/tmp/',
                        copy_associations_to=None,
                        given_associations=None):
    """

    Given a set of AIR files and additional inforamtion, data for the training of DNNs for
    environment classification are prepared.

    Args:
        wav_files: Location of AIR wav files
        framesize: The framesize to ues
        get_pow_spec: Convert audio to log-power spectrum domain
        max_air_len: The maximum length of the signals (truncate to or pad to)
        fs: The sampling frequency of the wav fiels to expect
        forced_fs: The sampling frequency to convert the data to
        keep_ids: None (not used)
        cacheloc: Location to use for cache reading and saving
        start_at_max: Modify the signals so that the maximum energy sample is at the begiing. (
        can be used to align AIRs)
        read_cached_latest: Read the data from the last saved cache (if nay)
        wavform_logpow: Get the signals in the log-power time domain
        write_cached_latest: Write the collected data in a cache for fast reuse
        max_speech_read: Maximum length of speech signal to read
        max_air_read: maximum aIR length to read up to
        utt_per_env: Number of utternaces to convolve with each AIR
        parse_as_dirctories: Parse the inputs as directiries and not as individual fiels
        speech_files: Speec files of locations
        save_speech_associations: Save the speech associations with the corresponding AIRs
        save_speech_examples: Enable the saving of examples of the reverberant speech created
        drop_speech: Do not include the speech samples in the saving of the cache or in the RAM.
        Keep only the training data arrays
        as_hdf5_ds: Keep the data as HDF5 datasets on disk. (Reduces RAM usage a lot)
        choose_channel: Channels to use for each AIR
        no_fex: Skip the data processign phase and just return the raw singals
        scratchpad: Location to use for temporary saving
        copy_associations_to: Save a copy of the speech-aIR associations here
        given_associations: Provided associatiosn between speech files and AIRs. This can be used
        in the case where you want to use specific speech samples for specific AIRs

    Returns:
        (X, None), Sample_names, None,
        (AIRs, Speech, Reverberant_speech),
        (Group_name, Groups), Number_of_utternaces_convolved_with_each_AIR

    """
    try:
        from os.path import isfile, basename
    except ImportError:
        raise
    from scipy.signal import fftconvolve
    import numpy as np
    from h5py import File
    from scipy.io import wavfile
    from utils_spaudio import my_resample, write_wav
    from utils_base import find_all_ft, run_command
    from random import sample
    import pandas as pd
    from random import randint
    from time import time

    run_command('mkdir -p ' + cacheloc)
    latest_file = cacheloc + '/training_test_data_wav.h5'
    timestamp = str(time())
    filename_associations = scratchpad + '/air_speech_associations_' + timestamp + '.csv'
    base_examples_dir = scratchpad + '/feature_extraction_examples/'
    if keep_ids is not None:
        raise AssertionError('No ids exist in this context')
    if speech_files is None:
        utt_per_env = 1
        if save_speech_associations:
            print(
                'There is no speech to save in associations, setting to false')
            save_speech_associations = False
        if save_speech_examples:
            print(
                'There is no speech to save audio for, setting to 0 examples')
            save_speech_examples = 0

    try:
        hf = None
        if isfile(latest_file) and read_cached_latest:
            print('Reading :' + latest_file)
            hf = File(latest_file, 'r')
            if as_hdf5_ds:
                x = hf['x']
                ids = hf['ids']
                airs = hf['airs']
                utt_per_env = np.array(hf['utts'])
                rev_speech = hf['rev_names']
                clean_speech = hf['clean_speech']
                print('Done creating handles to : ' + latest_file)
            else:
                utt_per_env = np.array(hf['utts'])
                x = np.array(hf.get('x'))
                ids = np.array(hf.get('ids'))
                airs = np.array(hf.get('airs'))
                rev_speech = np.array(hf.get('rev_names'))
                clean_speech = np.array(hf.get('clean_speech'))
                print('Done reading : ' + latest_file)
            if given_associations is not None:
                print(
                    '! I read the cache so the given associations were not used'
                )
            if copy_associations_to is not None:
                print(
                    '! I read the cache so the associations could not be saved at '
                    + copy_associations_to)
            return (x, None), ids, None, (airs, clean_speech,
                                          rev_speech), utt_per_env
    except (ValueError, KeyError) as ME:
        print('Tried to read ' + latest_file + ' but failed with ' +
              ME.message)
        if hf is not None:
            hf.close()

    if given_associations is not None:
        print('You gave me speech associations, Speech: ' +
              str(len(given_associations['speech'])) +
              ' entries and Offsets: ' +
              str(len(given_associations['speech'])) + ' entries')

    ids = None
    x = None
    x_speech = None
    x_rev_speech = None

    if forced_fs is None:
        forced_fs = fs
    resample_op = lambda x: x
    if not forced_fs == fs:
        resample_op = lambda x: np.array(
            my_resample(np.array(x.T, dtype=float), fs, forced_fs)).T

    if max_air_read is not None:
        if fs is None:
            raise AssertionError('Cannot work with max_air_read without fs')
        max_air_read_samples = int(np.ceil(fs * max_air_read))
    if max_speech_read is not None:
        if fs is None:
            raise AssertionError('Cannot work with max_speech_read without fs')
        max_speech_read_samples = int(np.ceil(fs * max_speech_read))
    else:
        max_speech_read_samples = None

    if parse_as_dirctories:
        if not type(wav_files) is list:
            wav_files = [wav_files]
        wav_files = find_all_ft(wav_files, ft='.wav', find_iname=True)
    if speech_files is not None:
        if not type(speech_files) is list:
            speech_files = [speech_files]
        speech_files = find_all_ft(speech_files, ft='.wav', find_iname=True)

    if save_speech_examples:
        run_command('rm -r ' + base_examples_dir)
        run_command('mkdir -p ' + base_examples_dir)

    associations = []
    save_counter = 0
    all_names = [
        basename(i).replace('.wav', '') + '_' + str(j) for i in wav_files
        for j in range(utt_per_env)
    ]
    if type(choose_channel) is list:
        choose_channel = [
            i for i in choose_channel for _ in range(utt_per_env)
        ]
    wav_files = [i for i in wav_files for _ in range(utt_per_env)]
    offsets = []
    for i, this_wav_file in enumerate(wav_files):
        if False and speech_files is not None:
            print "Reading: " + this_wav_file + " @ " + str(
                i + 1) + " of " + str(len(wav_files)),
        names = [all_names[i]]
        this_fs, airs = wavfile.read(this_wav_file)
        airs = airs.astype(float)
        if airs.ndim > 1:
            if choose_channel is not None:
                if type(choose_channel) is list:
                    airs = airs[:, choose_channel[i]]
                    names[0] += '_ch' + str(choose_channel[i])
                else:
                    airs = airs[:, choose_channel]
                    names[0] += '_ch' + str(choose_channel)
            else:
                names = [
                    names[0] + '_' + str(ch_id)
                    for ch_id in range(airs.shape[1])
                ]
            airs = airs.T
        airs = np.atleast_2d(airs)
        airs /= np.repeat(np.atleast_2d(abs(airs).max()).T, airs.shape[1],
                          1).astype(float)
        if airs.shape[0] > 1 and given_associations is not None:
            raise AssertionError(
                'Cannot work out given associations for multichannel airs')
        this_speech_all = []
        this_rev_speech_all = []
        if speech_files is not None:
            for ch_id in range(airs.shape[0]):
                if given_associations is None:
                    chosen_file = sample(range(len(speech_files)), 1)[0]
                    this_speech_file = speech_files[chosen_file]
                else:
                    chosen_file = given_associations['speech'][i]
                    this_speech_file = chosen_file
                associations.append(chosen_file)
                this_speech_fs, this_speech = wavfile.read(this_speech_file)
                if this_speech.ndim > 1:
                    raise AssertionError(
                        'Can\'t deal with multichannel speech in this context')
                if not this_speech_fs == this_fs:
                    this_speech = my_resample(this_speech, this_speech_fs,
                                              this_fs)
                max_offset_for_check = None
                if max_speech_read_samples is not None:
                    max_offset_for_check = this_speech.size - max_speech_read_samples
                    offset = randint(
                        0, this_speech.size - max_speech_read_samples)
                    this_speech = this_speech[offset:offset +
                                              max_speech_read_samples]
                else:
                    offset = 0
                if given_associations is not None:
                    offset = given_associations['offsets'][i]
                    if max_speech_read_samples is not None:
                        if offset >= max_offset_for_check:
                            raise AssertionError(
                                'Invalid offset from given associations, got '
                                + str(offset) + ' expected max is ' +
                                str(this_speech.size -
                                    max_speech_read_samples))

                conv_air = np.array(airs[ch_id, :])
                conv_air = conv_air[np.where(~(conv_air == 0))[-1][0]:np.
                                    where(~(conv_air == 0))[-1][-1]]

                # Making convolution
                this_rev_speech = fftconvolve(this_speech, conv_air, 'same')
                #

                dp_arival = np.argmax(abs(conv_air))
                this_rev_speech = this_rev_speech[dp_arival:]
                if dp_arival > 0:
                    this_rev_speech = np.concatenate(
                        (this_rev_speech,
                         np.zeros(dp_arival, dtype=this_rev_speech.dtype)))

                this_speech = np.atleast_2d(this_speech)
                this_rev_speech = np.atleast_2d(this_rev_speech)
                this_speech_all.append(this_speech)
                this_rev_speech_all.append(this_rev_speech)

                offsets.append(offset)
                if save_speech_examples >= save_counter:
                    save_names = [
                        basename(this_wav_file).replace('.wav', '') + '_air_' +
                        str(offset) + '.wav',
                        basename(this_wav_file).replace('.wav', '') +
                        '_clean_speech_' + str(offset) + '.wav',
                        basename(this_wav_file).replace('.wav', '') +
                        '_rev_speech_' + str(offset) + '.wav'
                    ]
                    for examples in range(len(save_names)):
                        save_names[examples] = base_examples_dir + save_names[
                            examples]
                    write_wav(save_names[0], this_fs, airs[ch_id, :])
                    write_wav(save_names[1], this_fs, this_speech.flatten())
                    write_wav(save_names[2], this_fs,
                              this_rev_speech.flatten())
                    save_counter += 1
            this_speech = np.concatenate(this_speech_all, axis=0)
            this_rev_speech = np.concatenate(this_rev_speech_all, axis=0)

        if not this_fs == fs:
            raise AssertionError('Your sampling rates are not consistent')
        if i > 0:
            ids = np.concatenate((ids, names))
        else:
            ids = names

        if max_air_read is not None:
            airs = airs[:, 0:max_air_read_samples]
        if False and speech_files is not None:
            print("Got " + str(airs.shape))
        airs = resample_op(airs)
        if airs.ndim < 2:
            airs = np.atleast_2d(airs)
        # print('Done resampling')
        if i > 0:
            if x.shape[1] < airs.shape[1]:
                npads = -x.shape[1] + airs.shape[1]
                x = np.concatenate((x, np.zeros(
                    (x.shape[0], npads)).astype(x.dtype)),
                                   axis=1)
                x = np.concatenate((x, airs), axis=0)
            else:
                if x.shape[1] > airs.shape[1]:
                    npads = x.shape[1] - airs.shape[1]
                    airs = np.concatenate(
                        (airs, np.zeros(
                            (airs.shape[0], npads)).astype(airs.dtype)),
                        axis=1)
                x.resize((x.shape[0] + airs.shape[0], x.shape[1]),
                         refcheck=False)
                x[-airs.shape[0]:, :] = np.array(airs)

            if speech_files is not None:
                if x_speech.shape[1] < this_speech.shape[1]:
                    npads = -x_speech.shape[1] + this_speech.shape[1]
                    x_speech = np.concatenate(
                        (x_speech, np.zeros((x_speech.shape[0], npads)).astype(
                            x_speech.dtype)),
                        axis=1)
                    x_speech = np.concatenate((x_speech, this_speech), axis=0)
                else:
                    if x_speech.shape[1] > this_speech.shape[1]:
                        npads = x_speech.shape[1] - this_speech.shape[1]
                        this_speech = np.concatenate(
                            (this_speech,
                             np.zeros((this_speech.shape[0], npads)).astype(
                                 this_speech.dtype)),
                            axis=1)
                    x_speech.resize((x_speech.shape[0] + this_speech.shape[0],
                                     x_speech.shape[1]),
                                    refcheck=False)
                    x_speech[-this_speech.shape[0]:, :] = this_speech

                if x_rev_speech.shape[1] < this_rev_speech.shape[1]:
                    npads = -x_rev_speech.shape[1] + this_rev_speech.shape[1]
                    x_rev_speech = np.concatenate(
                        (x_rev_speech, np.zeros(
                            (x_rev_speech.shape[0], npads)).astype(
                                x_rev_speech.dtype)),
                        axis=1)
                    x_rev_speech = np.concatenate(
                        (x_rev_speech, this_rev_speech), axis=0)
                else:
                    if x_rev_speech.shape[1] > this_rev_speech.shape[1]:
                        npads = x_rev_speech.shape[1] - this_rev_speech.shape[1]
                        this_rev_speech = np.concatenate(
                            (this_rev_speech,
                             np.zeros(
                                 (this_rev_speech.shape[0], npads)).astype(
                                     this_rev_speech.dtype)),
                            axis=1)
                    x_rev_speech.resize(
                        (x_rev_speech.shape[0] + this_rev_speech.shape[0],
                         x_rev_speech.shape[1]),
                        refcheck=False)
                    x_rev_speech[
                        -this_rev_speech.shape[0]:, :] = this_rev_speech
        else:
            x = np.array(airs)
            if speech_files is not None:
                x_speech = np.array(this_speech)
                x_rev_speech = np.array(this_rev_speech)

    if save_speech_associations:
        from utils_base import run_command
        df = pd.DataFrame({
            'air':
            wav_files,
            'speech':
            np.array(speech_files)[associations]
            if given_associations is None else given_associations['speech'],
            'offsets':
            offsets
            if given_associations is None else given_associations['offsets']
        })

        df.to_csv(filename_associations, index=False)
        print('Saved: ' + filename_associations +
              ('' if given_associations is None else
               ' which was created from the given associations'))
        if copy_associations_to is not None:
            run_command('cp ' + filename_associations + ' ' +
                        copy_associations_to)
            print('Saved: ' + copy_associations_to)

    if fs is not None:
        print('Got ' + str(x.shape[0]) + ' AIRs of duration ' +
              str(x.shape[1] / float(fs)))
    else:
        print('Got ' + str(x.shape[0]) + ' AIRs of length ' + str(x.shape[1]))

    if speech_files is not None:
        proc_data = x_rev_speech
    else:
        proc_data = x

    if drop_speech:
        x_rev_speech = []
        x_speech = []
        x = []

    if no_fex:
        x_out = None
        print('Skipping feature extraction')
    else:
        x_out = data_post_proc(np.array(proc_data), forced_fs, start_at_max,
                               framesize, get_pow_spec, max_air_len,
                               wavform_logpow)

        print('Left with ' + str(x_out.shape) + ' AIR features data ')

    ids = ids.astype(str)

    wrote_h5 = False
    if write_cached_latest:
        try:
            hf = File(latest_file, 'w')
            if no_fex:
                hf.create_dataset('x', data=[])
            else:
                hf.create_dataset('x', data=x_out)
            hf.create_dataset('y', data=[])
            hf.create_dataset('ids', data=ids)
            hf.create_dataset('class_names', data=[])
            hf.create_dataset('airs', data=x)
            hf.create_dataset('utts', data=utt_per_env)
            if speech_files is not None:
                hf.create_dataset('clean_speech', data=x_speech)
                hf.create_dataset('rev_names', data=x_rev_speech)
            else:
                hf.create_dataset('clean_speech', data=[])
                hf.create_dataset('rev_names', data=[])
            hf.close()
            wrote_h5 = True
            print('Wrote: ' + str(latest_file))
        except IOError as ME:
            print('Cache writing failed with ' + str(ME.message))

        if (not wrote_h5) and as_hdf5_ds:
            raise AssertionError('Could not provide data in correct format')
        if as_hdf5_ds:
            hf = File(latest_file, 'r')
            x_out = hf['x']
            ids = hf['ids']
            x = hf['airs']
            x_speech = hf['clean_speech']
            x_rev_speech = hf['rev_names']
            # hf.close()

    return (x_out, None), ids, None, (x, x_speech, x_rev_speech), utt_per_env
                                '_result_modeled_air_baselines.pdf')
score_names, scores = acenvmodel.get_eval_scores(verbose=True)
acenvmodel.plot_modeling_results(saveloc=results_dir + '/' + the_name +
                                 '_result_modeled_air_hat.pdf',
                                 interactive=interactive)

if not early_only:
    rir = acenv.impulse_response[:, do_channel]
    rir_hat = acenvmodel.air_reconstructed_from_model.flatten()
    try:
        fs_speech, s = wavfile.read(speech_loc)
    except IOError as ME:
        print('Could not read speech file ' + speech_loc + ' with: ' +
              ME.message)
        exit(0)
    s = (s[0:fs_speech * 10].astype('float128') /
         s[0:fs_speech * 10].max()).astype(float)
    if not model_fs == fs_speech:
        s = my_resample(s, fs_speech, model_fs)
    rev = np.convolve(s, rir)
    rev_hat = np.convolve(s, rir_hat)

    def write_the_wav(filename, x):
        write_wav(filename, model_fs, x)

    write_the_wav(results_dir + '/' + the_name + '_clean.wav', s)
    write_the_wav(results_dir + '/' + the_name + '_rir_hat.wav', rir_hat)
    write_the_wav(results_dir + '/' + the_name + '_rir.wav', rir)
    write_the_wav(results_dir + '/' + the_name + '_rev.wav', rev)
    write_the_wav(results_dir + '/' + the_name + '_rev_hat.wav', rev_hat)
    def __init__(self,
                 name='',
                 filename='',
                 samples=None,
                 sampling_freq=0,
                 keep_channel=None,
                 max_allowed_silence=0.001,
                 is_simulation=None,
                 silent_samples_offset=False,
                 matlab_engine=None):
        """

        Args:
            name: Used as the label for the object
            filename: The filename for the measured/simualted AIR
            samples: The samples of the measured/simulated AIR
            sampling_freq (int): The sampling frequency for the AIR
        """
        self.name = name
        self.sampling_freq = 0
        self.impulse_response = np.array([])
        self.room_name = None
        self.room_type = None
        self.room_dimensions = (None, None, None)
        self.rec_position = [(None, None, None)]
        self.src_position = (None, None, None)
        self.is_simulation = is_simulation
        self.from_database = None
        self.receiver_name = None
        self.receiver_config = None
        self.known_room = False
        self.filename = filename
        self.nchannels = 0
        max_allowed_silence_samples = int(
            np.ceil(max_allowed_silence * self.sampling_freq))

        if (len(filename) == 0) & (samples is None):
            raise NameError(getfname() + ':FilenameOrSamplesRequired')

        if samples is not None:
            self.impulse_response = samples
            if sampling_freq <= 0:
                raise AssertionError('SamplingFreqCannotBe0orNegative')
            self.sampling_freq = sampling_freq
            if keep_channel is not None:
                self.impulse_response = self.get_channel(keep_channel)
        else:
            self.sampling_freq, self.impulse_response = wavfile.read(
                self.filename)
            if keep_channel is not None:
                self.impulse_response = self.get_channel(keep_channel)
            self.impulse_response = self.impulse_response.astype(
                float) / np.max(np.abs(self.impulse_response))
            if sampling_freq > 0:
                self.impulse_response = np.array(
                    my_resample(np.array(self.impulse_response),
                                self.sampling_freq,
                                sampling_freq,
                                matlab_eng=matlab_engine))
                self.sampling_freq = sampling_freq
        try:
            if self.impulse_response.ndim == 1:
                self.impulse_response = column_vector(self.impulse_response)
        except AttributeError:
            pass
        if len(filename) > 0:
            self.add_room_info()
            self.add_receiver_info()
        if self.impulse_response.ndim < 2:
            self.nchannels = 1
        else:
            self.nchannels = self.impulse_response.shape[1]

        scale_by = float(abs(self.impulse_response).max())
        if scale_by > 0:
            self.impulse_response = self.impulse_response / scale_by

        if self.impulse_response is not None:
            start_sample = self.impulse_response.shape[0]
            for i in range(self.impulse_response.shape[1]):
                start_sample = min(
                    start_sample,
                    max(
                        0,
                        np.nonzero(self.impulse_response[:, i])[0][0] -
                        max_allowed_silence_samples))
            if start_sample > 0 and silent_samples_offset:
                self.impulse_response = self.impulse_response[start_sample:, :]
                print('Offsetted AIR by ' + str(start_sample) + ' samples')
Пример #6
0
    args = parser.parse_args()
    print('Args given : ' + str(args))

    if args.nodisplay:
        use('Agg')
    import matplotlib.pyplot as plt

    doing_e2e = args.e2e

    file_loc = args.file_loc
    speech_file = args.speech_file

    savelocation = args.saveloc + '/gan_acenv_' + get_timestamp() + '/'
    if speech_file is not None:
        fs, speech_samples = wavfile.read(speech_file)
        speech_samples = my_resample(speech_samples, fs, global_fs)
        speech_samples = speech_samples[0:int(global_fs * 6)]
        speech_samples = speech_samples / float(np.abs(speech_samples).max())
    else:
        speech_samples = None

    if doing_e2e:
        from fe_utils import get_ace_xy
        from utils_spaudio import align_max_samples

        if file_loc is None:
            file_loc = '../results_dir/ace_h5_info.h5'
        model_framesize = 64
        max_air_len_import = air_len
        wavform_logpow = False
        get_pow_spec = False