Exemplo n.º 1
0
def fit2(kern, audio, file_name, max_par, fs):
    """Fit kernel to data """

    # time vector for kernel
    n = kern.size
    xkern = np.linspace(0., (n - 1.) / fs, n).reshape(-1, )

    # initialize parameters
    if0 = gpitch.find_ideal_f0([file_name])[0]
    init_f, init_v = gpitch.init_cparam(y=audio, fs=fs, maxh=max_par, ideal_f0=if0, scaled=False)[0:2]

    list_init_params = []
    for i in range(init_v.size):
        list_init_params.append([init_v[i], 0.1, init_f[i]])

    p0 = np.array(list_init_params).reshape(-1, )

    # optimization
    popt = scipy.optimize.curve_fit(func, xkern, kern.reshape(-1,), p0, bounds=(0., p0.size*[20000.]))[0]

    # compute initial and learned kernel
    kern_init = func(xkern, *p0)
    kern_approx = func(xkern, *popt)

    # get kernel hyperparameters
    # npartials = (pstar.size - 2) / 2
    # noise_var = pstar[0]
    # lengthscale = pstar[1]
    # variance = pstar[2: npartials + 2]
    # frequency = pstar[npartials + 2:]
    # params = [lengthscale, variance, frequency]
    params = popt
    return params, kern_init, kern_approx
Exemplo n.º 2
0
def fit(kern, audio, file_name, max_par, fs):
    """Fit kernel to data """

    # time vector for kernel
    n = kern.size
    xkern = np.linspace(0., (n - 1.) / fs, n).reshape(-1, 1)

    # initialize parameters
    if0 = gpitch.find_ideal_f0([file_name])[0]
    init_f, init_v = gpitch.init_cparam(y=audio, fs=fs, maxh=max_par, ideal_f0=if0, scaled=False)[0:2]
    init_l = np.array([0., 1.])

    # optimization
    p0 = np.hstack((init_l, init_v, init_f))  # initialize params
    pstar = optimize_kern(x=xkern, y=kern, p0=p0)

    # compute initial and learned kernel
    kern_init = approximate_kernel(p0, xkern)
    kern_approx = approximate_kernel(pstar, xkern)

    # get kernel hyperparameters
    npartials = (pstar.size - 2) / 2
    noise_var = pstar[0]
    lengthscale = pstar[1]
    variance = pstar[2: npartials + 2]
    frequency = pstar[npartials + 2:]
    params = [lengthscale, variance, frequency]
    return params, kern_init, kern_approx
Exemplo n.º 3
0
    def init_kernel(self, covsize=441, num_sam=10000, max_par=1, train=False, save=False, load=False):

        nfiles = len(self.train_data)
        self.params = [[], [], []]
        skern, xkern = nfiles * [np.zeros((1, 1))], nfiles * [None]

        if train:
            scov, samples = nfiles * [None], nfiles * [None]
            self.sampled_cov = nfiles * [None]

            for i in range(nfiles):

                # sample cov matrix
                self.sampled_cov[i], skern[i], samples[i] = gpitch.samplecov.get_cov(self.train_data[i].y,
                                                                                     num_sam=num_sam, size=covsize)

                # approx kernel
                params = gpitch.kernelfit.fit(kern=skern[i], audio=self.train_data[i].y,
                                              file_name=self.train_data[i].name, max_par=max_par, fs=16000)[0]
                self.params[0].append(params[0])  # lengthscale
                self.params[1].append(params[1])  # variances
                self.params[2].append(params[2])   # frequencies

                xkern[i] = np.linspace(0., (covsize - 1.) / self.train_data[i].fs, covsize).reshape(-1, 1)
            self.kern_sampled = [xkern, skern]

            if save:
                self.save()

        elif load:
            self.load_kernel()  # load already learned parameters

        else:
            # init kernels with fft of data
            for i in range(nfiles):
                f0 = gpitch.find_ideal_f0([self.train_data[i].name])[0]

                params = gpitch.init_cparam(y=self.train_data[i].y.copy(),
                                            fs=self.train_data[i].fs,
                                            maxh=max_par,
                                            ideal_f0=f0)

                self.params[0].append(np.array(0.1))  # lengthscale
                self.params[1].append(params[1])  # variances
                self.params[2].append(params[0])  # frequencies

                skern[i] = fftpack.ifft(np.abs(fftpack.fft(self.train_data[i].y.copy().reshape(-1, ))))[0:covsize].real
                skern[i] /= np.max(skern[i])
                xkern[i] = np.linspace(0., (covsize - 1.) / self.train_data[i].fs, covsize).reshape(-1, 1)
            self.kern_sampled = [xkern, skern]

        # init kernel specific pitch
        self.kern_pitches = gpitch.init_kernels.init_kern_com(num_pitches=len(self.train_data),
                                                              lengthscale=self.params[0],
                                                              energy=self.params[1],
                                                              frequency=self.params[2],
                                                              len_fixed=True)
Exemplo n.º 4
0
def get_kernel_features(filenames, ytrain, maxh, fs):
    num_pitches = len(filenames)
    if0 = gpitch.find_ideal_f0(filenames)  # ideal frequency for each pitch
    all = [gpitch.init_cparam(y=ytrain[i], fs=fs, maxh=maxh, ideal_f0=if0[i], scaled=False) for i in range(num_pitches)]
    freq_feat = num_pitches*[None]
    var_feat =  num_pitches*[None]
    for i in range(num_pitches):
        freq_feat[i] = all[i][0].copy()
        var_feat[i] =  all[i][1].copy()
    return all, freq_feat, var_feat
Exemplo n.º 5
0
def train_notebook(gpu='0',
                   list_limits=None,
                   maxiter=[1000, 10000],
                   nivps=[200, 200],
                   frames=8000,
                   save=True):

    sess = gpitch.init_settings(gpu)  # choose gpu to work

    ## import 12 audio files for intializing component parameters
    datadir = '/import/c4dm-04/alvarado/datasets/ss_amt/training_data/'
    lfiles = gpitch.lfiles_training
    lfiles = lfiles[list_limits[0]:list_limits[1]]
    numf = len(lfiles)  # number of files loaded
    if0 = gpitch.find_ideal_f0(lfiles)  # ideal frequency for each pitch
    x2, y2, fs2 = [], [], []
    for i in range(numf):
        a, b, c = gpitch.readaudio(datadir + lfiles[i],
                                   frames=32000,
                                   aug=False)
        x2.append(a.copy())
        y2.append(b.copy())
        fs2.append(c)
    lkernel, iparam = gpitch.init_models.init_kernel_training(
        y=y2, list_files=lfiles)

    ## Compare FFT kernels and initialization data
    array0 = np.asarray(0.).reshape(-1, 1)
    x_p = np.linspace(-5, 5, 10 * 16000).reshape(-1, 1)
    k_p = []
    for i in range(numf):
        k_p.append(lkernel[1][i].compute_K(x_p, array0))
    Fdata = np.linspace(0., 8000., 16000).reshape(-1, 1)
    Fkernel = np.linspace(0., 8000., 5 * 16000).reshape(-1, 1)
    mplt.plot_fft(Fdata, Fkernel, y2, k_p, numf, iparam)

    ## import 12 audio files for training (same data but only 0.5 seconds)
    n = frames
    x, y, fs = [], [], []
    for i in range(numf):
        a, b, c = gpitch.readaudio(datadir + lfiles[i], frames=n, aug=True)
        x.append(a.copy())
        y.append(b.copy())
        fs.append(c)

    ## initialize models
    m = []
    nivps_a, nivps_c = nivps[0], nivps[
        1]  # num inducing variables per second for act and comp
    nlinfun = gpitch.logistic
    for i in range(numf):
        z = gpitch.init_iv(x=x[i],
                           num_sources=numf,
                           nivps_a=nivps_a,
                           nivps_c=nivps_c,
                           fs=fs[i])
        kern = [[lkernel[0][i]], [lkernel[1][i]]]
        m.append(gpitch.pdgp.Pdgp(x=x[i], y=y[i], z=z, kern=kern))
        m[i].za.fixed = True
        m[i].zc.fixed = True

    ## optimization
    for i in range(numf):
        st = time.time()
        #m[i].kern_act[0].variance.fixed = True
        #m[i].kern_com[0].lengthscales.fixed = True
        m[i].optimize(disp=1, maxiter=maxiter[0])
        m[i].za.fixed = False
        m[i].optimize(disp=1, maxiter=maxiter[1])
        print("model {}, time optimizing {} sec".format(
            i + 1,
            time.time() - st))
        tf.reset_default_graph()

    ## prediction
    m_a, v_a = [], []  # list mean, var activation
    m_c, v_c = [], []  # list mean, var component
    m_s = []  # mean source
    for i in range(numf):
        st = time.time()
        mean_act, var_act = m[i].predict_act(x[i])
        mean_com, var_com = m[i].predict_com(x[i])
        print("model {}, time predicting {}".format(str(i + 1),
                                                    time.time() - st))
        m_s.append(gpitch.logistic(mean_act[0]) * mean_com[0])
        m_a.append(mean_act[0])
        m_c.append(mean_com[0])
        v_a.append(var_act[0])
        v_c.append(var_com[0])
        tf.reset_default_graph()

    ## plots
    for i in range(len(m_s)):
        mplt.plot_training_all(x=x[i],
                               y=y[i],
                               source=m_s[i],
                               m_a=m_a[i],
                               v_a=v_a[i],
                               m_c=m_c[i],
                               v_c=v_c[i],
                               m=m[i],
                               nlinfun=nlinfun)
    mplt.plot_parameters(m)
    # k_p2 = []
    # for i in range(numf):
    #     k_p2.append(m[i].kern_com[0].compute_K(x_p, array0))
    # gpitch.pltrain.plot_fft(Fdata, Fkernel, y2, k_p2, numf, iparam)

    ## save models
    if save:
        for i in range(numf):
            m[i].prediction_act = [m_a[i], v_a[i]]
            m[i].prediction_com = [m_c[i], v_c[i]]
            location = "/import/c4dm-04/alvarado/results/ss_amt/train/logistic/trained_" + lfiles[
                i].strip('.wav') + ".p"
            pickle.dump(m[i], open(location, "wb"))

    return m
Exemplo n.º 6
0
def sousep(gpu='0',
           ins_idx=0,
           start=0,
           frames=14 * 16000,
           window_size=800,
           minibatch_size=None,
           maxiter=5000,
           nfpc=1,
           use_centers=True,
           visualize_results=True,
           save=True):

    if visualize_results:
        import matplotlib.pyplot as plt
        plt.rcParams["figure.figsize"] = (16, 4)
        import gpitch.myplots as mplt

    # initialize settings
    sess = gpitch.init_settings(gpu)  # select gpu
    list_inst = ['011PFNOM', '131EGLPM', '311CLNOM',
                 'ALVARADO']  # list of instruments
    inst = list_inst[ins_idx]  # instrument to analyse

    # print specifications of experiment
    print("Analysing file {}, window size {}, ".format(inst, window_size) +
          "GPU " + gpu)
    print("Iterations {}, minibatch size {}".format(maxiter, minibatch_size))

    # directories to load data and save results
    testdata_directory = '/import/c4dm-04/alvarado/datasets/ss_amt/test_data/'
    traindata_directory = '/import/c4dm-04/alvarado/datasets/ss_amt/training_data/'
    save_location = "/import/c4dm-04/alvarado/results/ss_amt/evaluation/sousep/"

    # load test data
    test_file = inst + "_mixture.wav"
    x, y, fs = gpitch.readaudio(testdata_directory + test_file,
                                frames=frames,
                                start=start,
                                scaled=True)
    num_windows = y.size / window_size
    print("Number of windows to analyze {}".format(num_windows))
    xtest, ytest = gpitch.segmented(x, y, window_size=window_size)

    # load train data for getting kernel features
    train_files = gpitch.lfiles_training[ins_idx]
    num_pitches = len(train_files)
    if0 = gpitch.find_ideal_f0(train_files)  # ideal frequency for each pitch

    # init lists to save features
    aux_list = num_pitches * [None]
    xtrain = list(aux_list)
    ytrain = list(aux_list)
    f_center = list(aux_list)
    v_center = list(aux_list)
    f_vec = list(aux_list)
    s_vec = list(aux_list)
    th = list(aux_list)
    frequency = list(aux_list)
    energy = list(aux_list)
    maxh = 25
    totalnumf = maxh * nfpc
    for i in range(num_pitches):
        # load train data
        xtrain[i], ytrain[i], fs = gpitch.readaudio(traindata_directory +
                                                    train_files[i])

        # get kernel features
        f_center[i], v_center[i], f_vec[i], s_vec[i], th[
            i] = gpitch.init_cparam(y=ytrain[i],
                                    fs=fs,
                                    maxh=maxh,
                                    ideal_f0=if0[i],
                                    scaled=False)
        frequency[i], energy[i] = gpitch.get_features(f=f_vec[i],
                                                      s=s_vec[i],
                                                      f_centers=f_center[i],
                                                      nfpc=nfpc,
                                                      use_centers=use_centers,
                                                      totalnumf=totalnumf)

    # initialization of models
    z, m, kern = [], [], []
    for i in range(num_windows):
        # init kernel
        kern.append(
            gpitch.init_kern(num_pitches=num_pitches,
                             energy=energy,
                             frequency=frequency))

        # init inducing variables
        z.append(
            gpitch.init_liv(x=xtest[i].copy(),
                            y=ytest[i].copy(),
                            num_sources=num_pitches)[0])

        # init model
        m.append(
            gpitch.pdgp.Pdgp(x=xtest[i],
                             y=ytest[i],
                             z=z[i],
                             kern=kern[i],
                             minibatch_size=minibatch_size))
        m[i].za.fixed = True
        m[i].zc.fixed = True
        m[i].likelihood.variance = 1.

    # optimization
    results = []
    for i in range(num_windows):
        start_time = time.time()
        m[i].optimize(disp=1, maxiter=maxiter)
        m[i].kern_act.fixed = True
        m[i].kern_com.fixed = True
        m[i].likelihood.variance = 0.000001
        m[i].optimize(disp=1, maxiter=maxiter)

        # compute prediction
        results.append(
            gpitch.pdgp.predict_windowed(model=m[i],
                                         xnew=xtest[i],
                                         ws=window_size))
        m[i].save_prediction = list(results[i])
        print("Time optimizing and predicting {} secs".format(time.time() -
                                                              start_time))

        # save models
        if num_windows == 1:
            pickle.dump(
                m[i],
                open(save_location + "models/" + inst + "_full_window.p",
                     "wb"))
        else:
            pickle.dump(
                m[i],
                open(
                    save_location + "models/" + inst + "_window_" +
                    str(i + 1) + ".p", "wb"))

        # reset tensorflow graph
        tf.reset_default_graph()

    # merge results
    results_merged = gpitch.window_overlap.merge_all(results)
    x_final, y_final, r_final = gpitch.window_overlap.get_results_arrays_noov(
        x=xtest, y=ytest, results=results_merged, window_size=window_size)

    # save wav files
    pitch_name = ['C', 'E', 'G']
    if save:
        for i in range(3):
            #if num_windows == 1:
            #    name = inst + "_" + pitch_name[i] + "_part" + "_full_window.wav"
            #else:
            name = inst + "_" + pitch_name[i] + "_part.wav"
            print name
            aux = r_final[-1][i] / np.max(np.abs(r_final[-1][i]))
            soundfile.write(save_location + name, aux, fs)

    # visualize results
    if visualize_results:

        # plot spectral respresentation training data and selected features
        plt.figure(figsize=(16, 9))
        for i in range(num_pitches):
            plt.subplot(3, 1, i + 1)
            plt.plot(f_vec[i], s_vec[i] / np.max(s_vec[i]), 'xC0')
            plt.plot(frequency[i], energy[i] / np.max(energy[i]), 'sC1')
            plt.plot(f_center[i], v_center[i] / np.max(v_center[i]), 'vC2')
            plt.legend([
                "Spetral density data", "Features selected",
                "Frequency centers"
            ])

        # plot prediction components and activations
        plt.figure(figsize=(16, 9))
        for i in range(num_windows):
            m_a, v_a, m_c, v_c, esource = m[i].save_prediction
            for j in range(num_pitches):
                plt.subplot(3, 2, 2 * (j + 1) - 1)
                mplt.plot_predict(xtest[i],
                                  m_a[j],
                                  v_a[j],
                                  m[i].za[j].value,
                                  plot_z=True,
                                  latent=True,
                                  plot_latent=False)

                plt.subplot(3, 2, 2 * (j + 1))
                mplt.plot_predict(xtest[i],
                                  m_c[j],
                                  v_c[j],
                                  m[i].zc[j].value,
                                  plot_z=False)

        # plot sources
        plt.figure(figsize=(16, 9))
        gpitch.window_overlap.plot_sources(x_final, y_final, r_final[-1])
    sess.close()
    return m, r_final[-1]