示例#1
0
def calc_potential_max(stim_folder, noise_filepath, out_dir):
    max_wav_samp = 0
    max_wav_rms = 0
    wavs = globDir(stim_folder, '*.wav')
    n_files = len(wavs)
    for ind, wav in enumerate(wavs):
        x, fs, enc = sndio.read(wav)
        max_wav_samp = np.max([max_wav_samp, np.max(np.abs(x))])
        #max_wav_rms = np.max([max_wav_rms, np.sqrt(np.mean(x**2))])
        level = asl_P56(x, fs, 16.)[0]
        max_wav_rms = np.max([
            max_wav_rms,
        ])
        print(
            f"Calculated level of {Path(wav).name} ({ind+1}/{n_files}): {level}"
        )
    x, fs, enc = sndio.read(noise_filepath)
    # noise_rms = np.sqrt(np.mean(x**2))
    print(f"Calculating level of {Path(noise_filepath).name}")
    noise_rms, _, _ = asl_P56(x, fs, 16.)
    print(f"Calculated level of {Path(noise_filepath).name}: {noise_rms}")
    max_noise_samp = max(np.abs(x))

    snr = -15.0
    snr_fs = 10**(-snr / 20)
    max_noise_samp *= max_wav_rms / noise_rms
    max_sampl = max_wav_samp + (max_noise_samp * snr_fs)
    reduction_coef = 1.0 / max_sampl
    np.save(os.path.join(out_dir, "reduction_coef.npy"), reduction_coef)
示例#2
0
def gen_rms_peak(files, OutRMSDir, OutPeakDir):
    rmsFiles = []
    peakFiles = []
    for file in files:
        head, tail = os.path.split(file)
        tail = os.path.splitext(tail)[0]
        tail = tail + "_rms.npy"
        dir_must_exist(OutRMSDir)
        rmsFilepath = os.path.join(OutRMSDir, tail)
        print("Generating: " + rmsFilepath)
        y, fs, _ = sndio.read(file)
        y_rms = window_rms(y, round(0.02 * fs))
        np.save(rmsFilepath, y_rms)
        rmsFiles.append(rmsFilepath)

        head, tail = os.path.split(file)
        tail = os.path.splitext(tail)[0]
        tail = tail + "_peak.npy"
        dir_must_exist(OutPeakDir)
        peakFilepath = os.path.join(OutPeakDir, tail)
        print("Generating: " + peakFilepath)
        peak = np.abs(y).max()
        np.save(peakFilepath, peak)
        peakFiles.append(peakFilepath)
    return rmsFiles, peakFiles
示例#3
0
def calc_spectrum(files, silences, fs=44100, plot=False):
    window = 4096
    sentenceLen = []
    sentenceFFT = []

    print("Calculating LTASS...")
    for ind, sentenceList in enumerate(files):
        for ind2, file in enumerate(sentenceList):
            x, fs, _ = sndio.read(file)
            f, t, Zxx = sgnl.stft(x,
                                  window=np.ones(window),
                                  nperseg=window,
                                  noverlap=0)
            sil = silences[ind * 10 + ind2]
            sTemp = np.zeros((sil.shape[0], t.size), dtype=bool)
            for ind3, s in enumerate(sil):
                sTemp[ind3, :] = np.logical_and(t > s[0], t < s[1])
            invalidFFT = np.any(sTemp, axis=0)
            sentenceFFT.append(np.abs(Zxx[:, ~np.any(sTemp, axis=0)]))
            sentenceLen.append(x.size)
    sentenceLen = np.array([sentenceLen]).T
    sentenceLen = sentenceLen / sentenceLen.max()
    sentenceFFT = [x * sentenceLen[i] for i, x in enumerate(sentenceFFT)]
    sentenceFFT = np.concatenate([x.T for x in sentenceFFT])

    grandAvgFFT = np.mean(sentenceFFT, axis=0)
    grandAvgFFT = grandAvgFFT / grandAvgFFT.max()
    print("Fitting filter to LTASS...")
    b = sgnl.firls(2049, np.linspace(0, 1, 2049)[1:], grandAvgFFT[1:])
    if plot:
        plt.semilogy(np.abs(sgnl.freqz(b)[1]))
        plt.plot(np.linspace(0, 512, 2049), grandAvgFFT)
        plt.show()
    return b
示例#4
0
def calc_potential_max(wavs, noise_filepath, out_dir, out_name):
    max_wav_samp = 0
    max_wav_rms = 0
    for wav in wavs:
        x, fs, enc = sndio.read(wav)
        max_wav_samp = np.max([max_wav_samp, np.max(np.abs(x))])
        max_wav_rms = np.max([max_wav_rms, np.sqrt(np.mean(x**2))])
    x, fs, enc = sndio.read(noise_filepath)
    noise_rms = np.sqrt(np.mean(x**2))
    max_noise_samp = max(np.abs(x))

    snr = -5.
    snr_fs = 10**(-snr / 20)
    max_noise_samp *= max_wav_rms / noise_rms
    max_sampl = max_wav_samp + (max_noise_samp * snr_fs)
    reduction_coef = 1.0 / max_sampl
    np.save(os.path.join(out_dir, "{}.npy".format(out_name)), reduction_coef)
    return reduction_coef
示例#5
0
def main():
    wavs = globDir('./', 'stim.wav')
    for wav in wavs:
        x, fs, enc, fmt = sndio.read(wav, return_format=True)
        y = x[:, :2]
        head, tail = os.path.splitext(wav)
        out_filepath = "{0}_old{1}".format(head, tail)
        os.rename(wav, out_filepath)
        sndio.write(wav, y, rate=fs, format=fmt, enc=enc)
示例#6
0
    def level_calc(args):
        ind, wavfile = args
        x, fs, _ = sndio.read(wavfile)
        # level = asl_P56(x, fs, 16.)[0]
        level = rms_no_silences(x, fs, -30.)

        print(
            f"Calculated level of {Path(wavfile).name} ({ind+1}/{n_files}): {level}"
        )
        return level
示例#7
0
def gen_rms(file, OutDir):
    head, tail = os.path.split(file)
    tail = os.path.splitext(tail)[0]
    tail = tail + "_env.npy"
    dir_must_exist(OutDir)
    rmsFilepath = os.path.join(OutDir, tail)
    print("Generating: " + rmsFilepath)
    y, fs, _ = sndio.read(file)

    y = y[:, 0]
    y_rms = window_rms(y, round(0.02 * fs))
    np.save(rmsFilepath, y_rms)
    return rmsFilepath
示例#8
0
def main():
    '''
    '''
    wavs = globDir("./stimulus", "*.wav")
    for wav in wavs:
        x, fs, enc, fmt = sndio.read(wav, return_format=True)
        idx = np.arange(x.shape[0])
        breakpoint()
        y = np.vstack([x, x, np.zeros(x.shape[0])]).T
        trigger = gen_trigger(idx, 2., 0.01, fs)
        y[:, 2] = trigger
        wav_out = os.path.splitext(wav)[0] + "_trig.wav"
        sndio.write(wav_out, y, rate=fs, format=fmt, enc=enc)
示例#9
0
def loadAudio(filename):
    """
    loadAudio: loads audio data from file using pysndfile

    Note that, by default pysndfile converts the samples into floating point
    numbers and rescales them in the range [-1, 1]. This is avoided by specifying
    the option dtype=np.int16 which keeps both the original data type and range
    of values.
    """
    sndobj = sndio.read(filename, dtype=np.int16)
    samplingrate = sndobj[1]
    samples = np.array(sndobj[0])
    return samples, samplingrate
示例#10
0
def load_sphere(filepath):
    """
    Loads the utterance samples from a file.

    Source: lab3 of DT2119 Speech and Speaker Recognition at KTH, by prof. Giampiero Salvi (slightly modified)

    :param filepath: path to the utterance file (.wav)
    :return: (samples, sample rate), where samples is a numpy array of shape (n_samples,)
    """
    snd_obj = sndio.read(filepath, dtype=np.int16)
    samples = np.array(snd_obj[0])
    sample_rate = snd_obj[1]
    return samples, sample_rate
示例#11
0
def load_audio(file_path):
    """Loads audio data from wav file using pysndfile.

    Args:
        file_path: Path to a wav file.

    Returns:
        A tuple containing the samples and the sampling rate of the
        wav file, in this order.
    """
    data = sndio.read(file_path)
    sampling_rate = data[1]
    samples = np.array(data[0], dtype=np.float32)
    return samples, sampling_rate
示例#12
0
def gen_rms(files, OutDir):
    rmsFiles = []
    OutPeakDir = './stimulus/peak'
    for sentenceList in files:
        for file in sentenceList:
            head, tail = os.path.split(file)
            tail = os.path.splitext(tail)[0]
            tail_rms = tail + "_rms.npy"
            dir_must_exist(OutDir)
            rmsFilepath = os.path.join(OutDir, tail_rms)
            print("Generating: " + rmsFilepath)
            y, fs, _ = sndio.read(file)
            y_rms = calc_rms(y, round(0.02 * fs))
            np.save(rmsFilepath, y_rms)
            rmsFiles.append(rmsFilepath)

            y, fs, _ = sndio.read(file)
            tail_peak = tail + "_peak.npy"
            dir_must_exist(OutPeakDir)
            peakFilepath = os.path.join(OutPeakDir, tail_peak)
            print("Generating: " + peakFilepath)
            peak = np.abs(y).max()
            np.save(peakFilepath, peak)
    return rmsFiles
示例#13
0
def main():
    '''
    '''
    fs = 44100
    f = 1000.0
    n = np.arange(fs * 60 * 5)
    y = np.sin(2 * np.pi * f * n / fs)
    coef = np.load('./out/calibration_coefficients/click_cal_coef.npy')
    y *= coef
    dir_must_exist('./out/calibrated_stim/')
    sndio.write("./out/calibrated_stim/1k_tone.wav",
                y,
                fs,
                format='wav',
                enc='pcm16')
    coef = np.load('./out/calibration_coefficients/da_cal_coef.npy')
    y, fs, enc = sndio.read('./out/stimulus/da_cal_stim.wav')
    sndio.write('./out/calibrated_stim/da_cal_stim.wav',
                y * coef,
                fs,
                format='wav',
                enc='pcm16')
    coef = np.load('./out/calibration_coefficients/mat_cal_coef.npy')
    y, fs, enc = sndio.read('./out/stimulus/mat_cal_stim.wav')
    sndio.write('./out/calibrated_stim/mat_cal_stim.wav',
                y * coef,
                fs,
                format='wav',
                enc='pcm16')
    coef = np.load('./out/calibration_coefficients/story_cal_coef.npy')
    y, fs, enc = sndio.read('./out/stimulus/story_cal_stim.wav')
    sndio.write('./out/calibrated_stim/story_cal_stim.wav',
                y * coef,
                fs,
                format='wav',
                enc='pcm16')
示例#14
0
 def playStimulus(self, wav):
     '''
     Output audio stimulus from numpy array
     '''
     self.newResp = False
     self.socketio.emit("stim_playing", namespace="/main")
     x, fs, _ = sndio.read(wav)
     if self.participant.parameters['hl_sim_active']:
         y = apply_hearing_loss_sim(x, fs)
     # Play audio
     if not self.dev_mode:
         sd.play(y, fs, blocking=True)
     else:
         self.play_wav('./da_stim/DA_170.wav', '')
     self.socketio.emit("stim_done", namespace="/main")
示例#15
0
    def loadStimulus(self):

        # Get folder path of all lists in the list directory
        lists = next(os.walk(self.listDir))[1]
        lists.pop(lists.index("demo"))
        # Don't reload an lists that have already been loaded
        pop = [lists.index(x) for x in self.loadedLists]
        for i in sorted(pop, reverse=True):
            del lists[i]
        # Randomly select n lists
        inds = self.inds
        # random.shuffle(inds)
        # Pick first n shuffled lists
        for ind in inds:
            # Get filepaths to the audiofiles and word csv file for the current
            # list
            listAudiofiles = globDir(os.path.join(self.listDir, lists[ind]), "*.wav")
            listCSV = globDir(os.path.join(self.listDir, lists[ind]), "*.csv")
            levels = globDir(os.path.join(self.listDir, lists[ind]), "*.mat")

            with open(listCSV[0]) as csv_file:
                csv_reader = csv.reader(csv_file)
                # Allocate empty lists to store audio samples, RMS and words of
                # each list sentence
                self.lists.append([])
                self.listsRMS.append([])
                self.listsString.append([])
                # Get data for each sentence
                for fp, words, level_file in zip(listAudiofiles, csv_reader, levels):
                    # Read in audio file and calculate it's RMS
                    x, self.fs, _ = sndio.read(fp)
                    logger.info(f"Calculating level for {Path(fp).name}")
                    # x_rms, _, _ = asl_P56(x, self.fs, 16.)
                    x_rms = rms_no_silences(x, self.fs, -30.)
                    self.lists[-1].append(x)
                    self.listsRMS[-1].append(x_rms)
                    self.listsString[-1].append(words)

        # Number of trials to split between adaptive tracks
        n = len(self.lists[0])*len(inds)
        #Number of adaptive tracks active
        tn = len(self.adaptiveTracks)
        self.trackOrder = list(np.repeat(np.arange(tn), np.floor(n/tn)))
        random.shuffle(self.trackOrder)

        # Shuffle order of sentence presentation
        self.availableSentenceInds = list(range(len(self.lists[0])))
        random.shuffle(self.availableSentenceInds)
def loadAudio(filename):
    """
    loadAudio: loads audio data from file using pysndfile

    Note that, by default pysndfile converts the samples into floating point
    numbers and rescales them in the range [-1, 1]. This can be avoided by
    specifying the dtype argument in sndio.read(). However, when I imported'
    the data in lab 1 and 2, computed features and trained the HMM models,
    I used the default behaviour in sndio.read() and rescaled the samples
    in the int16 range instead. In order to compute features that are
    compatible with the models, we have to follow the same procedure again.
    This will be simplified in future years.
    """
    sndobj = sndio.read(filename)
    samplingrate = sndobj[1]
    samples = np.array(sndobj[0]) * np.iinfo(np.int16).max
    return samples, samplingrate
示例#17
0
def calc_speech_rms(files, silences, rmsDir, fs=44100, plot=False):
    '''
    '''
    f = files
    sumsqrd = 0.0
    n = 0
    for wavfile, sil in zip(f, silences):
        y, fs, _ = sndio.read(wavfile)
        t = np.arange(y.size)
        sTemp = np.zeros(t.size, dtype=bool)
        print("Started")
        for ind, s in enumerate(sil):
            print("Check {}".format(ind))
            sTemp = np.logical_or(sTemp, np.logical_and(t > s[0], t < s[1]))
        print("Done")
        silentSamples = np.any(sTemp, axis=0)
        y_temp = y[~silentSamples]
        sumsqrd += np.sum(y_temp**2)
        n += y_temp.size
    rms = np.sqrt(sumsqrd / n)
    np.save(os.path.join(rmsDir, 'overall_da_rms.npy'), rms)
    return rms
示例#18
0
def flattenRMS(AudioFile, AnnotationFile):
    with open(AnnotationFile, 'r') as f:
        csvData = pd.read_csv(f)
    data, fs, encStr, fmtStr = sndio.read(AudioFile, return_format=True)
    csvData['start'] *= fs
    csvData['start'] = csvData['start'].astype(int)
    csvData['stop'] *= fs
    csvData['stop'] = csvData['stop'].astype(int)

    zerox = np.where(np.diff(np.sign(data)))[0]
    # get silent sections
    silences = csvData.loc[csvData['name'] == '#']
    audio = csvData.loc[csvData['name'] != '#']

    # Find nearest zero-crossing to start and stop times of silences
    nearestZerox = zerox[np.abs(zerox - csvData['start'][:, np.newaxis]).argmin(axis=1)]
    csvData['start'] = nearestZerox
    nearestZerox = zerox[np.abs(zerox - csvData['stop'][:, np.newaxis]).argmin(axis=1)]
    csvData['stop'] = nearestZerox

    csvData['rms'] = np.nan
    for ind, chunk in csvData.iterrows():
        if not chunk['name'] == '#':
            rms = np.sqrt(np.mean(data[chunk['start']:chunk['stop']]**2))
            csvData.iloc[ind, csvData.columns.get_loc('rms')] = rms
    avgRMS = csvData['rms'][csvData['rms'].notnull()].mean()

    silentData = np.zeros(int(0.3*fs))
    out = np.array([])
    for ind, chunk in csvData.iterrows():
        if chunk['name'] == '#':
            out = np.append(out, silentData)
        else:
            rmsCorFactor = avgRMS / chunk['rms']

            out = np.append(out, data[chunk['start']:chunk['stop']])#*rmsCorFactor)
            print(np.sqrt(np.mean((data[chunk['start']:chunk['stop']]*rmsCorFactor)**2)))

    sndio.write('./out.wav', out, rate=fs, format=fmtStr, enc=encStr)
示例#19
0
文件: gen_da.py 项目: Pezz89/BPLabs
def gen_da_stim(n, outpath):
    da_file = './BioMAP_da-40ms.wav'
    da_stim, fs, enc, fmt = sndio.read(da_file, return_format=True)
    prestim_size = 0.0158
    # Repetition rate in Hz
    repetition_rate = 10.9
    full_stim_size = 1. / repetition_rate
    da_size = da_stim.size / fs
    prestim = np.zeros(int(fs * prestim_size))
    poststim = np.zeros(int(fs * ((full_stim_size - prestim_size) - da_size)))
    y_part = np.concatenate([prestim, da_stim, poststim])
    pdb.set_trace()
    y_part_inv = -y_part
    loc_part = np.zeros(y_part.size)
    loc_part[prestim.size + 1] = 1

    y_2part = np.concatenate([y_part, y_part_inv])
    loc = np.concatenate([loc_part, loc_part])
    y_r = np.tile(y_2part, n)
    loc = np.tile(loc, n)
    loc = np.insert(loc, 0, np.zeros(fs))
    loc = np.where(loc)[0]

    y_r = np.insert(y_r, 0, np.zeros(fs))
    y_r = resampy.resample(y_r, fs, 44100)
    rat = 44100 / fs
    fs = 44100
    y_l = np.zeros(y_r.size)
    loc = loc * rat
    loc = loc.round().astype(int)
    np.save('./stimulus/3000_da_locs.npy', loc)

    idx = np.arange(y_l.size)
    trigger = gen_trigger(idx, 2., 0.01, fs)

    y = np.vstack((y_l, y_r, trigger)).T
    sndio.write(outpath, y, rate=44100, format=fmt, enc=enc)
    return outpath
示例#20
0
def getMUSDB_augmented(database_path):

    subsets = list()

    rate = None
    for subset in ['train', 'test']:
        for root, _, files in os.path.walk(os.path.join("database", sub_set)):
            if "voice.wav" in files:
                bass_audio = drums_audio = vocal_audio = mix_audio = None
                for file in files:
                    if file == "bass.wav":
                        bass_path = os.path.join(root, file)
                        bass_audio, sr, _ = sndio.read(bass_path)
                    elif file == "drums.wav":
                        drums_path = os.path.join(root, file)
                        drums_audio, sr, _ = sndio.read(drums_path)
                    elif file == "rest.wav":
                        other_path = os.path.join(root, file)
                        other_audio, sr, _ = sndio.read(other_path)
                    elif file == "mix.wav":
                        mix_path = os.path.join(root, file)
                        mix_audio, sr, _ = sndio.read(mix_path)
                    elif file == "voice.wav":
                        vocal_path = os.path.join(root, file)
                        vocal_audio, sr, _ = sndio.read(vocal_path)

                        if rate is None:
                            rate = sr
                        else:
                            if rate != sr:
                                raise RuntimeError(
                                    "getMUSDB_augmented::error::inconsistent sample rate in {} - {} != {}"
                                    .fromat(root, rate, sr))
                # Add other instruments to form accompaniment
                acc_audio = drums_audio + bass_audio + other_audio
                acc_path = os.path.join(local_path, os.path.basename(root),
                                        "accompaniment.wav")
                acc = write_wav_skip_existing(acc_path, acc_audio, rate)

                # Create mixture
                if mix_audio is None:
                    mix_path = os.path.join(local_path, os.path.basename(root),
                                            "mix.wav")
                    mix_audio = acc_audio + vocal_audio
                    mix = write_wav_skip_existing(mix_path, mix_audio, rate)
                else:
                    mix = Sample.from_array(mix_path, mix_audio, rate)

            diff_signal = np.abs(mix_audio - bass_audio - drums_audio -
                                 other_audio - vocal_audio)
            print(
                "Maximum absolute deviation from source additivity constraint: "
                + str(np.max(diff_signal)))  # Check if acc+vocals=mix
            print(
                "Mean absolute deviation from source additivity constraint:    "
                + str(np.mean(diff_signal)))

            # Collect all sources for now. Later on for
            # SVS: [mix, acc, vocal]
            # Multi-instrument: [mix, bass, drums, other, vocals]
            samples.append((mix, acc, bass, drums, other, vocal))

        subsets.append(samples)

    return subsets
示例#21
0
    def loadStimulus(self):
        '''
        '''
        self.participant.load('mat_test')
        try:
            srt_50 = self.participant.data['mat_test']['srt_50']
            s_50 = self.participant.data['mat_test']['s_50']
        except KeyError:
            raise KeyError(
                "Behavioural matrix test results not available, make "
                "sure the behavioural test has been run before "
                "running this test.")
        save_dir = self.participant.data_paths['eeg_test/stimulus']
        '''
        # Estimate speech intelligibility thresholds using predicted
        # psychometric function
        s_50 *= 0.01
        x = logit(self.si * 0.01)
        snrs = (x/(4*s_50))+srt_50
        snrs = np.append(snrs, np.inf)
        snr_map = pd.DataFrame({"speech_intel" : np.append(self.si, 0.0), "snr": snrs})
        snr_map_path = os.path.join(save_dir, "snr_map.csv")
        snr_map.to_csv(snr_map_path)
        snrs = np.repeat(snrs[np.newaxis], 4, axis=0)
        snrs = roll_independant(snrs, np.array([0,-1,-2,-3]))
        stim_dirs = [x for x in os.listdir(self.listDir) if os.path.isdir(os.path.join(self.listDir, x))]
        shuffle(stim_dirs)
        '''
        snrs = self.participant.data['parameters']['decoder_test_SNRs'] + srt_50
        stim_dirs = [
            x for x in os.listdir(self.listDir)
            if os.path.isdir(os.path.join(self.listDir, x))
        ]

        ordered_stim_dirs = []
        for ind in self.participant_parameters['decoder_test_lists']:
            for folder in stim_dirs:
                if re.match(f'Stim_({int(ind)})', folder):
                    ordered_stim_dirs.append(folder)

        # ordered_stim_dirs *= int(len(snrs))
        noise_file = PySndfile(self.noise_path, 'r')
        wav_files = []
        wav_metas = []
        question = []
        marker_files = []
        self.socketio.emit('test_stim_load', namespace='/main')
        for ind, dir_name in enumerate(ordered_stim_dirs[:snrs.shape[1]]):
            logger.debug(
                f"Processing list directory {ind+1} of {snrs.shape[1]}")
            stim_dir = os.path.join(self.listDir, dir_name)
            wav = globDir(stim_dir, "*.wav")[0]
            csv_files = natsorted(globDir(stim_dir, "*.csv"))
            marker_file = csv_files[0]
            question_files = csv_files[1:]
            # rms_file = globDir(stim_dir, "*.npy")[0]
            # speech_rms = float(np.load(rms_file))
            snr = snrs[:, ind]
            audio, fs, enc, fmt = sndio.read(wav, return_format=True)

            speech = audio[:, :2]
            triggers = audio[:, 2]
            #speech_rms, _, _ = asl_P56(speech, fs, 16.)
            rms_no_silences(speech, fs, -30.)

            wf = []
            wm = []
            for ind2, s in enumerate(snr):
                start = randint(0, noise_file.frames() - speech.shape[0])
                noise_file.seek(start)
                noise = noise_file.read_frames(speech.shape[0])
                noise_rms = np.sqrt(np.mean(noise**2))
                # noise_rms = asl_P56(noise, fs, 16)
                snr_fs = 10**(-s / 20)
                if snr_fs == np.inf:
                    snr_fs = 0.
                elif snr_fs == -np.inf:
                    raise ValueError(
                        "Noise infinitely louder than signal at snr: {}".
                        format(snr))
                noise = noise * (speech_rms / noise_rms)
                out_wav_path = os.path.join(
                    save_dir, "Stim_{0}_{1}.wav".format(ind, ind2))
                out_meta_path = os.path.join(
                    save_dir, "Stim_{0}_{1}.npy".format(ind, ind2))
                with np.errstate(divide='raise'):
                    try:
                        out_wav = (speech + (np.stack([noise, noise], axis=1) *
                                             snr_fs)) * self.reduction_coef
                    except:
                        set_trace()
                out_wav = np.concatenate([out_wav, triggers[:, np.newaxis]],
                                         axis=1)
                sndio.write(out_wav_path, out_wav, fs, fmt, enc)
                np.save(out_meta_path, s)
                wf.append(out_wav_path)
                wm.append(out_meta_path)
            wav_metas.append(wm)
            wav_files.append(wf)
            out_marker_path = os.path.join(save_dir,
                                           "Marker_{0}.csv".format(ind))
            marker_files.append(out_marker_path)
            copyfile(marker_file, out_marker_path)
            for q_file in question_files:
                out_q_path = os.path.join(
                    save_dir, "Questions_{0}_{1}.csv".format(ind, ind2))
                self.question_files.append(out_q_path)
                copyfile(q_file, out_q_path)

            for q_file_path in question_files:
                q = []
                with open(q_file_path, 'r') as q_file:
                    q_reader = csv.reader(q_file)
                    for line in q_reader:
                        q.append(line)
                question.append(q)

        self.wav_files = [item for sublist in wav_files for item in sublist]
        self.wav_metas = [item for sublist in wav_metas for item in sublist]

        self.question.extend(question)

        for item in marker_files:
            self.marker_files.extend([item] * 4)

        self.answers = np.empty(np.shape(self.question)[:2])
        self.answers[:] = np.nan
示例#22
0
    # the total number of changes is nrows
    n = nrows
    cols = np.random.geometric(0.5, n)
    cols[cols >= ncols] = 0
    rows = np.random.randint(nrows, size=n)
    array[rows, cols] = np.random.random(n)

    df = pd.DataFrame(array)
    df.fillna(method='ffill', axis=0, inplace=True)
    total = df.sum(axis=1)

    return total.values


da_x, da_fs, da_enc = sndio.read('./stimulus/wav/10min_da.wav')
sp_x, sp_fs, sp_enc = sndio.read('./noise_source/male_speech_resamp.wav')

# %load test.ipy
pink_n = voss(sp_x.size, 1)
da_rms = np.sqrt(np.mean(da_x**2))
sp_rms = np.sqrt(np.mean(sp_x**2))
pink_n_rms = np.sqrt(np.mean(pink_n**2))
da_x *= sp_rms / da_rms
pink_n *= sp_rms / pink_n_rms
f, Pxx_den = signal.welch(pink_n, sp_fs, nperseg=1024)
plt.semilogy(f, Pxx_den)
f, Pxx_den = signal.welch(sp_x, sp_fs, nperseg=1024)
plt.semilogy(f, Pxx_den)
f, Pxx_den = signal.welch(da_x[:, 1], da_fs, nperseg=1024)
plt.semilogy(f, Pxx_den)
示例#23
0
文件: rms_test.py 项目: Pezz89/BPLabs
def main():
    x, fs, enc = sndio.read(
        './matrix_test/behavioural_stim/stimulus/wav/sentence-lists/ukmatrix10.1/Trial_00001.wav'
    )
    rms = rms_no_silences(x, fs, -30)
    breakpoint()
示例#24
0
    def testLoop(self):
        '''
        Main loop for iteratively finding the SRT
        '''
        self.waitForPageLoad()

        self.displayInstructions()
        self.waitForPartReady()

        while not self.finishTest and not self._stopevent.isSet() and len(self.availableSentenceInds) and len(self.trackOrder):
            # Plot SNR of current trial to the clinician screen
            plt.clf()
            for at in self.adaptiveTracks:
                at.plotSNR()
            self.renderSNRPlot()
            # Get the index of the sentence to be played for the current trial
            currentSentenceInd = self.availableSentenceInds.pop(0)
            # Get the index of the current adaptive track to use
            self.adTrInd = self.trackOrder.pop(0)
            # Generate trial audioself.wordsCorrect
            self.y = self.adaptiveTracks[self.adTrInd].generateTrial(
                self.lists[0][currentSentenceInd],
                self.listsRMS[0][currentSentenceInd]
            )
            if self.participant.parameters['hl_sim_active']:
                self.y = apply_hearing_loss_sim(self.y, self.fs, channels=[0])
            # Define words presented in the current trial
            self.currentWords = self.listsString[0][currentSentenceInd]

            logger.info("-"*78)
            logger.info("{0:<25}".format("Current trial:") + f"{' '.join(self.currentWords)}")
            logger.info("{0:<25}".format("Current track index:") + f"{self.adTrInd}")
            logger.info("{0:<25}".format("Current trial number:") + f"{self.trialN}")
            logger.info("{0:<25}".format("Current SNR:") + f"{self.adaptiveTracks[self.adTrInd].snr}")
            if self.audio_cal:
                y, fs, fmt = sndio.read('./calibration/out/stimulus/mat_cal_stim.wav')
                self.playStimulus(y, fs)
            else:
                self.playStimulus(self.y, self.fs)
            self.waitForResponse()
            self.checkSentencesAvailable()
            if self.finishTest:
                break
            if self._stopevent.isSet():
                return
            logger.info("{0:<25}".format("N correct responses:") + f"{int(self.nCorrect*5)}")
            self.adaptiveTracks[self.adTrInd].calcSNR(self.nCorrect)
            self.checkSentencesAvailable()
            self.saveState(out=self.backupFilepath)
            self.trialN += 1
            self.adaptiveTracks[self.adTrInd].incrementTrialN()
        self.saveState(out=self.backupFilepath)
        logger.info("-"*78)
        if not self._stopevent.isSet():
            self.unsetPageLoaded()
            logger.info("Behavioural test complete")
            self.socketio.emit('processing-complete', {'data': ''}, namespace='/main')
            self.waitForPageLoad()
            # Plot SNR of current trial to the clinician screen
            plt.clf()
            for at in self.adaptiveTracks:
                at.plotSNR()
            self.renderSNRPlot()
            self.fitLogistic()
            self.waitForFinalise()
示例#25
0
    else:
        continue
    raise NameError('TIDIGITS root directory not found on system')

genders = ["man", "woman"]
speakers = ["ae", "ac"]

digits = ["o", "z", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
repetitions = ["a", "b"]

tidigits = []
for idx in range(len(speakers)):
    for digit in digits:
        for repetition in repetitions:
            filename = os.path.join(tidigitsroot, genders[idx], speakers[idx], digit+repetition+'.wav')
            sndobj = sndio.read(filename)
            # libsndfile scales the values down to the -1.0 +1.0 range
            # here we convert back to the range of 16 bit linear PCM
            # to get similar results as from Kaldi or HTK
            samples = np.array(sndobj[0])*np.iinfo(np.int16).max
            samplingrate = sndobj[1]
            tidigits.append({"filename": filename,
                             "samplingrate": samplingrate,
                             "gender": genders[idx],
                             "speaker": speakers[idx],
                             "digit": digit,
                             "repetition": repetition,
                             "samples": samples})

if sys.version_info.major==3:
    np.savez('tidigits_python3.npz', tidigits=tidigits)
示例#26
0
def concatenateStimuli(MatrixDir, OutDir, Length, n):
    # Get matrix wav file paths
    wavFiles = globDir(MatrixDir, '*.wav')

    stim_parts = os.path.join(MatrixDir, "stim_parts.csv")
    stim_words = os.path.join(MatrixDir, "stim_words.csv")
    stim_part_rows = []
    with open(stim_parts, 'r') as csvfile:
        stim_part_rows = [line for line in csv.reader(csvfile)]
    with open(stim_words, 'r') as csvfile:
        stim_word_rows = [line for line in csv.reader(csvfile)]

    wavFiles = natsorted(wavFiles)
    totalSize = 0
    y = []
    parts = []
    questions = []
    i = 0
    gapSize = np.uniform(0.8, 1.2, len(wavFiles))
    for wav, gap in zip(wavFiles, gapSize):
        if i == n:
            break
        wavObj = PySndfile(wav)
        fs = wavObj.samplerate()
        size = wavObj.frames()
        totalSize += size
        totalSize += int(gap * fs)
        if (totalSize / fs) > Length:
            # total size + 2 second silence at start
            y.append(np.zeros((totalSize + 2 * fs, 3)))
            parts.append([])
            questions.append([])
            i += 1
            totalSize = 0

    writePtr = 2 * fs
    idx = np.arange(0, writePtr)
    chunk = np.zeros(idx.size)
    chunk = np.vstack([chunk, chunk, chunk]).T
    trigger = gen_trigger(idx, 2., 0.01, fs)
    chunk[:, 2] = trigger
    for i, _ in enumerate(y):
        y[i][0:writePtr, :] = chunk

    i = 0
    for wav, word, part in zip(wavFiles, stim_word_rows, stim_part_rows):
        if writePtr >= y[i].shape[0]:
            i += 1
            writePtr = fs * 2
        if i == n:
            break
        x, fs, encStr, fmtStr = sndio.read(wav, return_format=True)
        threeMs = int(0.1 * fs)
        silence = np.zeros(threeMs)
        chunk = np.append(x, silence)

        idx = np.arange(writePtr, writePtr + chunk.shape[0])
        chunk = np.vstack([chunk, chunk, np.zeros(chunk.shape[0])]).T
        trigger = gen_trigger(idx, 2., 0.01, fs)
        chunk[:, 2] = trigger

        y[i][writePtr:writePtr + chunk.shape[0], :] = chunk
        questions[i].append(word)
        parts[i].append(part)

        writePtr += chunk.shape[0]

    for ind, (data, q, p) in enumerate(zip(y, questions, parts)):
        pysndfile.sndio.write(os.path.join(OutDir, 'stim_{}.wav'.format(ind)),
                              data,
                              format=fmtStr,
                              enc=encStr)
        with open('./out/stim/stim_words_{}.csv'.format(ind), 'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(q)
        with open('./out/stim/stim_parts_{}.csv'.format(ind), 'w') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerows(p)
示例#27
0
def serialize(train=True):
    """
    Serialize the TIMIT dataset to TFRecords
    :param train:
    :return:
    """

    base_data_path = FLAGS.input_train_dir if train else FLAGS.input_test_dir
    output_path = os.path.join(FLAGS.data_dir,
                               _FILENAME_TRAIN if train else _FILENAME_TEST)
    num_frames = TRN_NUM_FRAMES / FrameSize if train else TST_NUM_FRAMES / FrameSize

    timit = []
    print('Parsing .wav files...')
    for region in REGIONS:
        # iterate over all speakers for that region
        region_path = os.path.join(base_data_path, region)
        for speaker_id in os.listdir(region_path):
            speaker_path = os.path.join(region_path, speaker_id)

            # iterate over all utterances for that speaker
            speaker_wavs = glob.glob(speaker_path + '/*.wav')
            for wav in speaker_wavs:
                if "sa" not in wav:
                    # get the sound frequencies and sampling rate
                    sndobj = sndio.read(wav)
                    samplingrate = sndobj[1]
                    samples = np.array(sndobj[0]) * np.iinfo(np.int16).max

                    # parse the phoneme file
                    phonemes = _get_phonemes(wav.replace('.wav', '.phn'))

                    # get sentence
                    words = _get_words(wav.replace('.wav', '.wrd'))

                    timit.append({
                        'filename': wav,
                        'samplingrate': samplingrate,
                        'phonemes': phonemes,
                        'words': words,
                        'gender': speaker_id[0],
                        'speaker': speaker_id,
                        'samples': samples
                    })

    frame_ctn = 0

    frames = np.ndarray(shape=(num_frames, NUM_FILTERS, 1, Total_FEATURES))
    labels = np.ndarray(shape=(num_frames))

    # transform the samples into MSFC features
    print('Parsing frames from utterances...')

    # adding the counter for fix-frames input
    count = 0
    input_sample = np.ndarray(shape=(NUM_FILTERS, 1, Total_FEATURES),
                              dtype=np.float32)
    label_list = []

    for utt in timit:
        samples = utt['samples']
        phonemes = utt['phonemes']

        # extract each phoneme mfsc, delta and delta-delta
        for pho in phonemes:
            # extract the frames for this phonemes only
            pho_idx = class2pho[pho['phoneme']]['idx']
            pho_samples = samples[pho['start']:pho['end']]

            # get the filterbanks
            mfscs = ft.mfsc(pho_samples,
                            samplerate=utt['samplingrate'],
                            nfilt=NUM_FILTERS)

            # for each frame
            for mfsc in mfscs:
                # add the deltas and delta-deltas for each static frame
                delta = _get_delta(mfsc)
                delta2 = _get_delta(delta)

                # create the new frame representation
                frame = np.ndarray(shape=(NUM_FILTERS, 1, NUM_FEATURES),
                                   dtype=np.float32)
                frame[:, :, 0] = mfsc[:, None]
                frame[:, :, 1] = delta[:, None]
                frame[:, :, 2] = delta2[:, None]

                input_sample[:, :, 3 * count:3 * (count + 1)] = frame
                label_list.append(pho_idx)
                count += 1
                if count == 9:
                    count = 0
                    frames[frame_ctn, :, :, :] = input_sample
                    #print(label_list)
                    #print(Counter(label_list).most_common()[0][0])
                    labels[frame_ctn] = Counter(label_list).most_common()[0][0]
                    #print(label_list[4])
                    #labels[frame_ctn] = label_list[4]
                    frame_ctn += 1
                    #print('Finish ', frame_ctn)
                    input_sample = np.ndarray(shape=(NUM_FILTERS, 1,
                                                     Total_FEATURES),
                                              dtype=np.float32)
                    label_list.clear()

                    if frame_ctn % 1000 == 0:
                        print('- {0} frames processed...'.format(frame_ctn))

    frames = frames[0:frame_ctn, :, :, :]
    labels = labels[0:frame_ctn]

    print('Finished processing {0} frames!'.format(frame_ctn))
    means = frames.mean(axis=0)
    std = frames.std(axis=0)

    # normalize zero mean and unity variance
    frames = frames - means
    frames = frames / std

    # shuffle the frame
    frames_shuf = np.ndarray(shape=(frame_ctn, NUM_FILTERS, 1, Total_FEATURES))
    labels_shuf = np.ndarray(shape=(frame_ctn))
    index_shuf = list(range(len(frames)))
    shuffle(index_shuf)
    index = 0
    for i in index_shuf:
        frames_shuf[index, :, :, :] = frames[i]
        labels_shuf[index] = labels[i]
        index += 1
    print('Finish shuffle.............................')

    filename = output_path
    num_of_train = math.ceil(frame_ctn * 0.75)
    print('Total number of train file: ', num_of_train)
    print('Writing', filename)
    writer = tf.python_io.TFRecordWriter(filename)

    for i in range(frames_shuf.shape[0]):
        frame = np.ndarray(shape=(1, NUM_FILTERS, 1, Total_FEATURES),
                           dtype=np.float32)
        label = labels_shuf[i]
        frame[0, :, :, :] = frames_shuf[i]

        _convert_to_record(frame, label, writer)
        if i % 1000 == 0:
            print('- Wrote {0}/{1} frames...'.format(i, frames_shuf.shape[0]))

        if i == (num_of_train - 1):
            writer.close()
            filename = os.path.join(FLAGS.data_dir, _FILENAME_VAL)
            print('Writing', filename, '\t from frames', i)
            writer = tf.python_io.TFRecordWriter(filename)

    print('Finish Writing ', i)
    writer.close()

    # save the phoneme mapping file
    with open(
            os.path.join(FLAGS.data_dir,
                         'phon_tr.json' if train else 'phon_tst.json'),
            'w') as f:
        json.dump(class2pho, f, indent=4, sort_keys=True)
示例#28
0
        continue
    raise NameError('TIDIGITS root directory not found on system')

genders = ["man", "woman"]
speakers = ["bm", "ew"]
#speakers = ["ae", "ac"]

digits = ["o", "z", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
repetitions = ["a", "b"]

data = []
for idx in range(len(speakers)):
    for digit in digits:
        for repetition in repetitions:
            filename = os.path.join(tidigitsroot, genders[idx], speakers[idx], digit+repetition+'.wav')
            sndobj = sndio.read(filename, dtype=np.int16)
            # the following is not necessary any longer, but I need to check that the feature extraction
            # still works with int16 numbers. Also I need to change all occurrences in lab 2 and 3!!!!
            # libsndfile scales the values down to the -1.0 +1.0 range
            # here we convert back to the range of 16 bit linear PCM
            # to get similar results as from Kaldi or HTK
            #samples = np.array(sndobj[0])*np.iinfo(np.int16).max
            samplingrate = sndobj[1]
            data.append({"filename": filename,
                         "samplingrate": samplingrate,
                         "gender": genders[idx],
                         "speaker": speakers[idx],
                         "digit": digit,
                         "repetition": repetition,
                         "samples": sndobj[0]})
示例#29
0
def main():
    stim_dir = "../behavioural_stim/stimulus"
    wav_dir = "../behavioural_stim/stimulus/wav"
    base_dir = "../behavioural_stim/stimulus/wav/sentence-lists/"
    noise_dir = "../behavioural_stim/stimulus/wav/noise/"
    out_dir = "./out"
    dir_must_exist(base_dir)
    dir_must_exist(out_dir)
    dir_must_exist(wav_dir)
    dir_must_exist(noise_dir)

    noise_filepath = "../behavioural_stim/stimulus/wav/noise/noise_norm.wav"

    folders = os.listdir(base_dir)
    folders = natsorted(folders)[1:15]
    folders = list(zip(folders[::2], folders[1::2]))
    calc_potential_max(base_dir, noise_filepath, out_dir)
    n_questions = 4
    fs = 44100

    for ind, (list_folder_1, list_folder_2) in enumerate(folders):
        out_folder_name = 'Stim_{}'.format(ind)
        out_folder = os.path.join(out_dir, out_folder_name)
        delete_if_exists(out_folder)
        dir_must_exist(out_folder)
        out_wav_path = os.path.join(out_folder, "stim.wav")
        out_csv_path = os.path.join(out_folder, "markers.csv")
        out_rms_path = os.path.join(out_folder, "rms.npy")
        out_q_path = [
            os.path.join(out_folder, "questions_{}.csv".format(x))
            for x in range(n_questions)
        ]
        out_wav = PySndfile(out_wav_path, 'w',
                            construct_format('wav', 'pcm16'), 3, 44100)
        list_1_wav = globDir(os.path.join(base_dir, list_folder_1), '*.wav')
        list_2_wav = globDir(os.path.join(base_dir, list_folder_2), '*.wav')
        list_1_csv = globDir(os.path.join(base_dir, list_folder_1), '*.csv')
        list_2_csv = globDir(os.path.join(base_dir, list_folder_2), '*.csv')
        merged_wavs = list_1_wav + list_2_wav
        merged_csvs = list_1_csv + list_2_csv
        words = []
        for c in merged_csvs:
            with open(c, 'r') as csvfile:
                for line in csv.reader(csvfile):
                    words.append(line)
        c = list(zip(merged_wavs, words))
        shuffle(c)
        merged_wavs, words = zip(*c)
        sum_sqrd = 0.
        n = 0
        with open(out_csv_path, 'w') as csvfile, ExitStack() as stack:
            # Open all question files
            qfiles = [
                stack.enter_context(open(qfile, 'w')) for qfile in out_q_path
            ]
            writer = csv.writer(csvfile)
            qwriters = [csv.writer(qfile) for qfile in qfiles]

            counter = 0
            stim_count = len(merged_wavs)
            stim_count_half = stim_count // 2
            q_inds = np.array([
                sample(range(0, stim_count_half), n_questions),
                sample(range(stim_count_half, stim_count - 1), n_questions)
            ]).T
            a = 0
            silence = np.zeros((88200, 3))
            idx = np.arange(0, silence.shape[0])
            trigger = gen_trigger(idx, 2., 0.01, fs)
            silence[:, 2] = trigger
            out_wav.write_frames(silence)
            for ind, (wav, txt) in enumerate(zip(merged_wavs, words)):
                csv_line = [counter]
                silence = np.zeros((int(
                    np.random.uniform(int(0.3 * 44100), int(0.4 * 44100),
                                      1)), 3))
                idx = np.arange(counter, counter + silence.shape[0])
                trigger = gen_trigger(idx, 2., 0.01, fs)
                silence[:, 2] = trigger
                out_wav.write_frames(silence)
                counter += silence.shape[0]
                csv_line.append(counter)
                csv_line.append("#")
                writer.writerow(csv_line)
                csv_line = [counter]
                x, fs, enc = sndio.read(wav)
                sum_sqrd += np.sum(x**2)
                n += x.size

                y = np.vstack([x, x, np.zeros(x.size)]).T
                idx = np.arange(counter, counter + y.shape[0])
                trigger = gen_trigger(idx, 2., 0.01, fs)
                y[:, 2] = trigger
                out_wav.write_frames(y)
                counter += y.shape[0]
                csv_line.append(counter)
                csv_line.append(" ".join(txt))
                writer.writerow(csv_line)
                if ind in q_inds:
                    writer_ind = int(np.where(ind == q_inds)[0])
                    blank_ind = randint(0, len(txt) - 1)
                    q_list = copy(txt)
                    q_list[blank_ind] = '_'
                    qwriters[writer_ind].writerow(
                        [" ".join(q_list), txt[blank_ind]])
                    a += 1
            if a != 8:
                pdb.set_trace()

            csv_line = [counter]
            silence = np.zeros(
                (int(np.random.uniform(int(0.3 * 44100), int(0.4 * 44100),
                                       1)), 3))
            idx = np.arange(counter, counter + silence.shape[0])
            trigger = gen_trigger(idx, 2., 0.01, fs)
            silence[:, 2] = trigger
            out_wav.write_frames(silence)
            counter += silence.size
            csv_line.append(counter)
            csv_line.append("#")
            writer.writerow(csv_line)
            rms = np.sqrt(sum_sqrd / n)
            np.save(out_rms_path, rms)

            x, fs, enc = sndio.read(out_wav_path)