def compute_representations():
    args = {
        'timbre_spaces': list(sorted(load.database().keys())),
        'audio_representations': [
            'auditory_spectrum', 'fourier_spectrum', 'auditory_strf',
            'fourier_strf', 'auditory_spectrogram', 'fourier_spectrogram',
            'auditory_mps', 'fourier_mps'
        ],
        'log_foldername': './outs/ts_crossval',
        'audio_args': {
            'resampling_fs': 16000,
            'duration': 0.25,
            'duration_cut_decay': 0.05
        }
    }
    log_foldername = args['log_foldername']
    print('--processing')
    for i, tsp in enumerate(args['timbre_spaces']):
        print('Processing', tsp)
        subprocess.call(['mkdir', '-p', log_foldername+'/input_data'])
        for rs in args['audio_representations']:
            aud_repres = load.timbrespace_features(tsp, representations=[rs], audio_args=args['audio_args'])[rs]
            tab_red = []
            rs_type = rs.split('_')[-1]
            mapping = []
            variances = []
            if rs_type == 'strf':
                n_components = 1
                for i in range(len(aud_repres)):
                    strf_reduced, mapping_, variances = pca.pca_patil(
                        np.absolute(aud_repres[i]),
                        aud_repres[i].shape[1],
                        n_components=n_components)
                    strf_reduced = strf_reduced.flatten()
                    tab_red.append(strf_reduced)
                    mapping.append(mapping_)
                tab_red = np.transpose(np.asarray(tab_red))
            elif rs_type == 'spectrogram' or rs_type == 'mps':
                for i in range(len(aud_repres)):
                    tab_red.append(aud_repres[i].flatten())
                tab_red = np.transpose(np.asarray(tab_red))
            elif rs_type == 'spectrum':
                for i in range(len(aud_repres)):
                    tab_red.append(aud_repres[i])
                tab_red = np.transpose(np.asarray(tab_red))
            np.savetxt(log_foldername+'/input_data/{}_{}_input_data.txt'.format(tsp, rs), tab_red)
    print('--normalising')
    for rs in args['audio_representations']:
        print('Processing', rs)
        normas = []
        for i, tsp in enumerate(args['timbre_spaces']):
            data = np.loadtxt(log_foldername+'/input_data/{}_{}_input_data.txt'.format(tsp, rs))
            normas.append(np.mean(np.max(np.abs(data), axis=0)))
        for i, tsp in enumerate(args['timbre_spaces']):
            data = np.loadtxt(log_foldername+'/input_data/{}_{}_input_data.txt'.format(tsp, rs))
            data = data / np.mean(normas)
            # print('  save final data {} {:.5f}'.format(data.shape, np.mean(normas)))
            np.savetxt(log_foldername+'/input_data/{}_{}_input_data.txt'.format(tsp, rs), data)
Пример #2
0
def run_one_tsp_rs(tsp, rs, args):
    rslog_foldername = args['optim_args']['log_foldername']
    rs = rslog_foldername.split('/')[-1].split('-')[0]
    dissimil_mat = load.timbrespace_dismatrix(tsp, load.database())
    aud_repres = load.timbrespace_features(tsp,
                                           representations=[rs],
                                           audio_args=args['audio_args'])[rs]
    tab_red = []
    rs_type = rs.split('_')[-1]
    mapping = []
    variances = []
    if rs_type == 'strf':
        n_components = 1
        for i in range(len(aud_repres)):
            strf_reduced, mapping_, variances = pca.pca_patil(
                np.absolute(aud_repres[i]),
                aud_repres[i].shape[1],
                n_components=n_components)
            strf_reduced = strf_reduced.flatten()
            tab_red.append(strf_reduced)
            mapping.append(mapping_)
        tab_red = np.transpose(np.asarray(tab_red))
    elif rs_type == 'spectrogram' or rs_type == 'mps':
        for i in range(len(aud_repres)):
            tab_red.append(aud_repres[i].flatten())
        tab_red = np.transpose(np.asarray(tab_red))
    elif rs_type == 'spectrum':
        for i in range(len(aud_repres)):
            tab_red.append(aud_repres[i])
        tab_red = np.transpose(np.asarray(tab_red))
    pickle.dump(
        {
            'data_repres': aud_repres,
            'data_proj': tab_red,
            'mapping': mapping,
            'variances': variances,
            'dissimilarities': dissimil_mat,
            'audio_args': args['audio_args']
        }, open(os.path.join(rslog_foldername, 'dataset.pkl'), 'wb'))
    print('  data dimension:', tab_red.shape)
    print('* normalizing')
    tab_red = tab_red / np.mean(np.max(np.abs(tab_red), axis=0))
    correlations, _ = training.kernel_optim_lbfgs_log(tab_red, dissimil_mat,
                                                      **args['optim_args'])
def run_one_tsp_rs(tsp, rs, args):
    rslog_foldername = args['optim_args']['log_foldername']
    rs = rslog_foldername.split('/')[-1].split('-')[0]
    dissimil_mat = load.timbrespace_dismatrix(tsp, load.database())
    aud_repres = load.timbrespace_features(tsp,
                                           representations=[rs],
                                           audio_args=args['audio_args'])[rs]
    tab_red = []
    rs_type = rs.split('_')[-1]
    mapping = []
    variances = []
    reduce_strf = args['reduce_strf']
    if rs_type == 'strf':
        if reduce_strf == 'pca':
            n_components = 1
            for i in range(len(aud_repres)):
                strf_reduced, mapping_, variances = pca.pca_patil(
                    np.absolute(aud_repres[i]),
                    aud_repres[i].shape[1],
                    n_components=n_components)
                strf_reduced = strf_reduced.flatten()
                tab_red.append(strf_reduced)
                mapping.append(mapping_)
            tab_red = np.transpose(np.asarray(tab_red))
        elif reduce_strf == 'avg_time':
            for i in range(len(aud_repres)):
                strf_avgTime = np.mean(np.abs(aud_repres[i]), axis=0)
                freqVec = [0, 14, 28, 42, 56, 71, 85, 99, 113, 127]
                rateVec = [0, 3, 5, 7, 10, 11, 14, 16, 18, 21]
                scaleVec = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                indexedTab = strf_avgTime[freqVec, :, :][:,
                                                         scaleVec, :][:, :,
                                                                      rateVec]
                tab_red.append(indexedTab.flatten())  # decimation here
                # [0:128:12,0:11,0:22:2]
                # tab_red.append(strf_avgTime[0:128:12,0:11,0:22:2].flatten()) # decimation here
            tab_red = np.transpose(np.asarray(tab_red))
        elif reduce_strf == 'avg_time_avg_scale_rate_freq':
            for i in range(len(aud_repres)):
                tab_red.append(strf2avgvec(aud_repres[i]).flatten())
            tab_red = np.transpose(np.asarray(tab_red))
        elif reduce_strf == 'avg_time_avg_freq':
            for i in range(len(aud_repres)):
                strf_avgTime = np.mean(np.abs(aud_repres[i]), axis=0)
                freqVec = [0, 14, 28, 42, 56, 71, 85, 99, 113, 127]
                rateVec = [0, 3, 5, 7, 10, 11, 14, 16, 18, 21]
                scaleVec = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                indexedTab = strf_avgTime[freqVec, :, :][:,
                                                         scaleVec, :][:, :,
                                                                      rateVec]
                strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(
                    strf2avgvec(indexedTab),
                    nbChannels=10,
                    nbRates=10,
                    nbScales=10)

                tab_red.append(strf_scale_rate.flatten())
                # strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(strf2avgvec(aud_repres[i]))
                # plotStrfavg(strf_scale_rate, strf_freq_rate, strf_freq_scale)
                # tab_red.append(np.absolute(np.mean(aud_repres[i], axis=0)).flatten())
            tab_red = np.transpose(np.asarray(tab_red))
        elif reduce_strf == 'avg_time_avg_scale':
            for i in range(len(aud_repres)):
                strf_avgTime = np.mean(np.abs(aud_repres[i]), axis=0)
                freqVec = [0, 14, 28, 42, 56, 71, 85, 99, 113, 127]
                rateVec = [0, 3, 5, 7, 10, 11, 14, 16, 18, 21]
                scaleVec = [0, 2, 3, 4, 5, 6, 7, 8, 9, 10]
                indexedTab = strf_avgTime[freqVec, :, :][:,
                                                         scaleVec, :][:, :,
                                                                      rateVec]
                strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(
                    strf2avgvec(indexedTab),
                    nbChannels=10,
                    nbRates=10,
                    nbScales=10)

                tab_red.append(strf_freq_rate.flatten())
                # strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(strf2avgvec(aud_repres[i]))
                # plotStrfavg(strf_scale_rate, strf_freq_rate, strf_freq_scale)
                # tab_red.append(np.absolute(np.mean(aud_repres[i], axis=0)).flatten())
            tab_red = np.transpose(np.asarray(tab_red))
        elif reduce_strf == 'avg_time_avg_rate':
            for i in range(len(aud_repres)):
                strf_avgTime = np.mean(np.abs(aud_repres[i]), axis=0)
                freqVec = np.array([0, 14, 28, 42, 56, 71, 85, 99, 113, 127])
                rateVec = np.array([0, 3, 5, 7, 10, 11, 14, 16, 18, 21])
                scaleVec = np.array([0, 2, 3, 4, 5, 6, 7, 8, 9, 10])
                strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(
                    strf2avgvec(strf_avgTime[freqVec, scaleVec, rateVec]),
                    nbChannels=10,
                    nbRates=10,
                    nbScales=10)

                tab_red.append(strf_freq_scale.flatten())

                # strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(strf2avgvec(aud_repres[i]))
                # plotStrfavg(strf_scale_rate, strf_freq_rate, strf_freq_scale)
                # tab_red.append(np.absolute(np.mean(aud_repres[i], axis=0)).flatten())
            tab_red = np.transpose(np.asarray(tab_red))
        elif reduce_strf == 'avg_time_avg_SRSFFR':
            for i in range(len(aud_repres)):
                strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(
                    strf2avgvec(aud_repres[i]))
                concat_tab = np.concatenate(
                    (strf_scale_rate, strf_freq_rate, strf_freq_scale),
                    axis=None)
                tab_red.append(concat_tab.flatten())
                # strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(strf2avgvec(aud_repres[i]))
                # plotStrfavg(strf_scale_rate, strf_freq_rate, strf_freq_scale)
                # tab_red.append(np.absolute(np.mean(aud_repres[i], axis=0)).flatten())
            tab_red = np.transpose(np.asarray(tab_red))
        else:
            for i in range(len(aud_repres)):
                tab_red.append(aud_repres[i].flatten())
            tab_red = np.transpose(np.asarray(tab_red))
    elif rs_type == 'spectrogram' or rs_type == 'mps':
        for i in range(len(aud_repres)):
            tab_red.append(aud_repres[i].flatten())
        tab_red = np.transpose(np.asarray(tab_red))
    elif rs_type == 'spectrum':
        for i in range(len(aud_repres)):
            tab_red.append(aud_repres[i])
        tab_red = np.transpose(np.asarray(tab_red))
    # print(tab_red)
    # print(aud_repres)
    # print(mapping)
    # print(variances)
    # print(dissimil_mat)
    print(args['audio_args'])
    # pickle.dump({
    #     'data_repres': aud_repres,
    #     'data_proj': tab_red,
    #     'mapping': mapping,
    #     'variances': variances,
    #     'dissimilarities': dissimil_mat,
    #     'audio_args': args['audio_args']
    # }, open(os.path.join(rslog_foldername, 'dataset.pkl'), 'wb'))
    print('  data dimension:', tab_red.shape)
    print('* normalizing')
    print(np.mean(np.max(np.abs(tab_red), axis=0)))
    tab_red = tab_red / np.mean(np.max(np.abs(tab_red), axis=0))
    correlations, sigmas__ = training.kernel_optim_lbfgs_log(
        tab_red, dissimil_mat, **args['optim_args'])
    # strf_scale_rate, strf_freq_rate, strf_freq_scale = avgvec2strfavg(sigmas__)
    # plotStrfavg(strf_scale_rate, strf_freq_rate, strf_freq_scale,show='false',figname=tsp+'_'+str(correlations[-1]))
    pickle.dump(
        {
            'sigmas': sigmas__,
            'representations': tab_red,
            'dissimilarities': dissimil_mat,
            'audio_args': args['audio_args']
        },
        open(
            os.path.join(args['log_foldername'] + 'resultsOptims_' + rs_type +
                         '_' + reduce_strf + '_' + tsp + '.pkl'), 'wb'))
def run_one_tsp_rs_crossval(tsp, rs, args):
    rslog_foldername = args['optim_args']['log_foldername']
    rs = rslog_foldername.split('/')[-1].split('-')[0]
    dissimil_mat = load.timbrespace_dismatrix(tsp, load.database())
    aud_repres = load.timbrespace_features(tsp,
                                           representations=[rs],
                                           audio_args=args['audio_args'])[rs]
    tab_red = []
    rs_type = rs.split('_')[-1]
    mapping = []
    variances = []
    reduce_strf = args['reduce_strf']
    if rs_type == 'strf':
        if reduce_strf == 'pca':
            n_components = 1
            for i in range(len(aud_repres)):
                strf_reduced, mapping_, variances = pca.pca_patil(
                    np.absolute(aud_repres[i]),
                    aud_repres[i].shape[1],
                    n_components=n_components)
                strf_reduced = strf_reduced.flatten()
                tab_red.append(strf_reduced)
                mapping.append(mapping_)
            tab_red = np.transpose(np.asarray(tab_red))
        elif reduce_strf == 'avg_time':
            for i in range(len(aud_repres)):
                tab_red.append(
                    np.absolute(np.mean(aud_repres[i], axis=0)).flatten())
            tab_red = np.transpose(np.asarray(tab_red))
        else:
            for i in range(len(aud_repres)):
                tab_red.append(aud_repres[i].flatten())
            tab_red = np.transpose(np.asarray(tab_red))
    elif rs_type == 'spectrogram' or rs_type == 'mps':
        for i in range(len(aud_repres)):
            tab_red.append(aud_repres[i].flatten())
        tab_red = np.transpose(np.asarray(tab_red))
    elif rs_type == 'spectrum':
        for i in range(len(aud_repres)):
            tab_red.append(aud_repres[i])
        tab_red = np.transpose(np.asarray(tab_red))
    pickle.dump(
        {
            'data_repres': aud_repres,
            'data_proj': tab_red,
            'mapping': mapping,
            'variances': variances,
            'dissimilarities': dissimil_mat,
            'audio_args': args['audio_args']
        }, open(os.path.join(rslog_foldername, 'dataset.pkl'), 'wb'))

    print('  data dimension:', tab_red.shape)
    print('* normalizing')
    tab_red = tab_red / np.mean(np.max(np.abs(tab_red), axis=0))
    ninstrus = tab_red.shape[1]
    correlations_training = []
    sigmas_training = []
    correlations_testing = []
    correlations_testing_pearson = []
    correlations_testing_spearman = []
    sigmas_testing = []
    distances_testing = []
    target_testing = []

    print('* cross-validation tests')
    org_fname = args['optim_args']['log_foldername']
    for fold in range(ninstrus):
        # training data
        train_idx = [i for i in range(tab_red.shape[1]) if i != fold]
        sorted_train_idx = sorted(train_idx)
        input_data_training = tab_red[:, train_idx]
        target_data_training = dissimil_mat[train_idx, :]
        target_data_training = target_data_training[:, train_idx]
        # testing data
        test_idx = [fold]
        input_data_testing = tab_red[:, test_idx[0]]
        target_data_testing = np.zeros((ninstrus - 1, 1))
        cpt_i = 0
        for i in range(ninstrus):
            if i > test_idx[0]:
                target_data_testing[cpt_i] = dissimil_mat[test_idx[0], i]
                cpt_i += 1
            elif i < test_idx[0]:
                target_data_testing[cpt_i] = dissimil_mat[i, test_idx[0]]
                cpt_i += 1
        target_testing.append(target_data_testing.reshape(1, -1))
        mean_target_test = np.mean(target_data_testing)
        std_target_test = np.std(target_data_testing)

        # optimisation on fold i
        print('* Fold {} - {} {}'.format(fold + 1, train_idx, test_idx))
        args['optim_args']['allow_resume'] = False
        args['optim_args']['log_foldername'] = org_fname + '/fold{}'.format(
            fold + 1)
        subprocess.call(['mkdir', '-p', args['optim_args']['log_foldername']])
        correlations, sigmas = training.kernel_optim_lbfgs_log(
            input_data=input_data_training,
            target_data=target_data_training,
            test_data=(input_data_testing, target_data_testing),
            **args['optim_args'])
        correlations_training.append(correlations)
        sigmas_training.append(sigmas)

        distances = np.zeros((ninstrus - 1, 1))
        for i in range(ninstrus - 1):
            distances[i, 0] = -np.sum(
                np.power(
                    np.divide(
                        tab_red[:, test_idx[0]] -
                        tab_red[:, sorted_train_idx[i]],
                        (sigmas + np.finfo(float).eps)), 2))
        distances_testing.append(distances.reshape(1, -1))
        mean_distances = np.mean(distances)
        stddev_distances = np.std(distances)
        Jn = np.sum(
            np.multiply(distances - mean_distances,
                        target_data_testing - mean_target_test))
        Jd = std_target_test * stddev_distances * (ninstrus - 1)

        correlations_testing.append(Jn / Jd)
        sigmas_testing.append(sigmas)
        pearsr = pearsonr(distances, target_data_testing)
        correlations_testing_pearson.append(pearsr)
        spear_rho, spear_p_val = spearmanr(distances, target_data_testing)
        spear = [spear_rho, spear_p_val]
        correlations_testing_spearman.append(spear)

        print('\tFold={} test_corr={:.3f} (PE {} | SP {}) (train_corr={:.3f})'.
              format(fold + 1, Jn / Jd, pearsr, spear, correlations[-1]))
        pickle.dump(
            {
                'correlations_training': correlations_training,
                'sigmas_training': sigmas_training,
                'correlations_testing': correlations_testing,
                'sigmas_testing': sigmas_testing,
                'correlations_testing_pearson': correlations_testing_pearson,
                'correlations_testing_spearman': correlations_testing_spearman,
                'distances_testing': distances_testing,
                'target_data_testing': target_testing
            },
            open(os.path.join(rslog_foldername, 'crossval_intrats_res.pkl'),
                 'wb'))

    mean_correlations_training = [
        correlations_training[i][-1] for i in range(len(correlations_training))
    ]
    mean_correlations_training = np.mean(mean_correlations_training)

    distances_testing = np.squeeze(np.array(distances_testing)).reshape(-1, 1)
    target_testing = np.squeeze(np.array(target_testing)).reshape(-1, 1)

    print(
        '\tAll Folds: mean_training_corr={} | corr_testing_pears={} | corr_testing_spear={}'
        .format(mean_correlations_training,
                pearsonr(distances_testing[:, 0], target_testing[:, 0]),
                spearmanr(distances_testing[:, 0], target_testing[:, 0])))
Пример #5
0
def run_one_tsp_rs_crossval(tsp, rs, args):
    rslog_foldername = args['optim_args']['log_foldername']
    rs = rslog_foldername.split('/')[-1].split('-')[0]
    dissimil_mat = load.timbrespace_dismatrix(tsp, load.database())
    aud_repres = load.timbrespace_features(tsp,
                                           representations=[rs],
                                           audio_args=args['audio_args'])[rs]
    tab_red = []
    rs_type = rs.split('_')[-1]
    mapping = []
    variances = []
    if rs_type == 'strf':
        n_components = 1
        for i in range(len(aud_repres)):
            strf_reduced, mapping_, variances = pca.pca_patil(
                np.absolute(aud_repres[i]),
                aud_repres[i].shape[1],
                n_components=n_components)
            strf_reduced = strf_reduced.flatten()
            tab_red.append(strf_reduced)
            mapping.append(mapping_)
        tab_red = np.transpose(np.asarray(tab_red))
    elif rs_type == 'spectrogram' or rs_type == 'mps':
        for i in range(len(aud_repres)):
            tab_red.append(aud_repres[i].flatten())
        tab_red = np.transpose(np.asarray(tab_red))
    elif rs_type == 'spectrum':
        for i in range(len(aud_repres)):
            tab_red.append(aud_repres[i])
        tab_red = np.transpose(np.asarray(tab_red))
    pickle.dump(
        {
            'data_repres': aud_repres,
            'data_proj': tab_red,
            'mapping': mapping,
            'variances': variances,
            'dissimilarities': dissimil_mat,
            'audio_args': args['audio_args']
        }, open(os.path.join(rslog_foldername, 'dataset.pkl'), 'wb'))
    print('  data dimension:', tab_red.shape)
    print('* normalizing')
    tab_red = tab_red / np.mean(np.max(np.abs(tab_red), axis=0))

    corr_fold = []
    sigmas_fold = []
    print('* cross-validation tests')
    for fold in range(20):

        idx = [i for i in range(tab_red.shape[1])]
        random.shuffle(idx)
        train_idx = idx[:int(2 * len(idx) / 3)]
        test_idx = idx[int(2 * len(idx) / 3):]
        dmat = dissimil_mat[train_idx, :]
        dmat = dmat[:, train_idx]
        print('* Fold {} - {} {}'.format(fold + 1, train_idx, test_idx))
        correlations, sigmas = training.kernel_optim_lbfgs_log(
            tab_red[:, train_idx], dmat, **args['optim_args'])

        # testing
        test_data = tab_red[:, test_idx]
        dmat = dissimil_mat[test_idx, :]
        target_test_data = dmat[:, test_idx]
        idx_triu = np.triu_indices(target_test_data.shape[0], k=1)
        target_test_data_v = target_test_data[idx_triu]
        mean_target_test = np.mean(target_test_data_v)
        std_target_test = np.std(target_test_data_v)
        ninstrus = len(test_idx)
        no_samples = ninstrus * (ninstrus - 1) / 2
        kernel = np.zeros((ninstrus, ninstrus))
        for i in range(ninstrus):
            for j in range(i + 1, ninstrus):
                kernel[i, j] = -np.sum(
                    np.power(
                        np.divide(test_data[:, i] - test_data[:, j],
                                  (sigmas + np.finfo(float).eps)), 2))
        kernel_v = kernel[idx_triu]
        mean_kernel = np.mean(kernel_v)
        std_kernel = np.std(kernel_v)
        Jn = np.sum(
            np.multiply(kernel_v - mean_kernel,
                        target_test_data_v - mean_target_test))
        Jd = no_samples * std_target_test * std_kernel
        corr_fold.append(Jn / Jd)
        sigmas_fold.append(sigmas)
        print('\tfold={} corr={}'.format(fold + 1, Jn / Jd))
        pickle.dump({
            'corr_fold': corr_fold,
            'sigmas_fold': sigmas_fold
        }, open(os.path.join(rslog_foldername, 'crossval_res.pkl'), 'wb'))
    print('\tall folds: corr={} ({})'.format(np.mean(corr_fold),
                                             np.std(corr_fold)))