from models.som.SOM import SOM
from models.som.SOMTest import showSom
from oneShotLearning.utility import *

from utils.constants import Constants
from utils.utils import from_csv_with_filenames, from_csv_visual_10classes

audio_data_path = os.path.join(Constants.DATA_FOLDER, '10classes',
                               'audio_data_25t.csv')
visual_data_path = os.path.join(Constants.DATA_FOLDER, '10classes',
                                'VisualInputTrainingSet.csv')
classes = list(range(0, 10))

if __name__ == '__main__':
    print('classes', classes)
    a_xs, a_ys, _ = from_csv_with_filenames(audio_data_path)
    v_xs, v_ys = from_csv_visual_10classes(visual_data_path)
    a_ys = [int(y) - 1000 for y in a_ys]
    v_ys = [int(y) - 1000 for y in v_ys]
    # scale data to 0-1 range
    a_xs = StandardScaler().fit_transform(a_xs)
    v_xs = StandardScaler().fit_transform(v_xs)
    for i in range(0, 1):
        a_xs_train, a_ys_train, a_xs_test, a_ys_test = get_random_classes(
            a_xs, a_ys, classes, 10, 2)
        v_xs_train, v_ys_train, v_xs_test, v_ys_test = get_random_classes(
            v_xs, v_ys, classes, 10, 2)
        # print('shape audio input train', np.shape(a_xs_train))
        # print('shape audio labels train', np.shape(a_ys_train))
        # print('shape visual input train', np.shape(v_xs_train))
        # print('shape visual labels train', np.shape(v_ys_train))
from models.som.SOMTest import showSom
from utils.utils import load_from_pickle
from utils.utils import load_data
from utils.utils import from_csv_with_filenames
from utils.constants import Constants
from sklearn.externals import joblib
import os
import logging
import numpy as np

csv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data.csv')
LOAD = True

if __name__ == '__main__':
    logging.info('Loading data')
    xs, ys, filenames = from_csv_with_filenames(csv_path)
    ys = [int(y) - 1000 for y in ys]
    vect_size = len(xs[0])
    audio_som = SOM(20,
                    30,
                    vect_size,
                    n_iterations=100,
                    checkpoint_dir=os.path.join(Constants.DATA_FOLDER,
                                                '10classes', 'audio_model',
                                                ''))
    if not LOAD:
        audio_som.train(xs)
    else:
        logging.info('Training som')
        audio_som.restore_trained()
    #audio_som.plot_som(xs, ys, plot_name='audio_som.png')
Пример #3
0
    for i, x in enumerate(xs):
        prototype_dict[ys[i]].append(x)
    prototype_dict = {
        k: np.array(prototype)
        for k, prototype in prototype_dict.items()
    }
    for y in set(ys):
        prototype_dict[y] = np.mean(prototype_dict[y], axis=0)
    prototypes = np.asarray(list(prototype_dict.values())).T
    for i, x in enumerate(xs):
        prototype_distance_matrix[ys[i]][:] += np.mean(
            np.absolute(prototypes - x.reshape((-1, 1))), axis=0).T
    print(prototype_distance_matrix)
    plt.matshow(prototype_distance_matrix, cmap=plt.get_cmap('Greys'))
    plt.show()


def examples_distance(xs, i1, i2):
    return np.linalg.norm(xs[i1] - xs[i2])


if __name__ == '__main__':
    xs, ys, filenames = from_csv_with_filenames(CSV_PATH)
    ys = [int(y) - 1000 for y in ys]  # see comment above

    average_prototype_distance_matrix(xs, ys, filenames)
    #i1 = 23
    #i2 = 163
    #d = examples_distance(xs, i1, i2)
    #print(d)
Пример #4
0
    flat_xs = np.array([x.ravel() for x in xs])
    ys = np.array(ys)
    j = 0
    for train_i, test_i in kf.split(flat_xs):
        rbf = SVC()
        linear = LinearSVC()
        print('Fitting RBF...')
        rbf.fit(flat_xs[train_i], ys[train_i])
        print('Fitting linear...')
        linear.fit(flat_xs[train_i], ys[train_i])
        pred = rbf.predict(flat_xs[test_i])
        results_rbf.append(np.average(pred == ys[test_i]))
        pred = linear.predict(flat_xs[test_i])
        results_linear.append(np.average(pred == ys[test_i]))
        print('Fold {}, wrong ones: '.format(j))
        for i, is_correct in enumerate(pred == ys[test_i]):
            if not is_correct:
                print('{}: was predicted {}'
                      .format(filenames[test_i[i]], pred[i]))
        j += 1
    print('SVC RBF: {}; SVC Linear: {}'.format(np.average(results_rbf), np.average(results_linear)))


if __name__ == '__main__':
    logging.info('Loading pickle')
    #xs, ys = load_data(os.path.join(Constants.DATA_FOLDER, 'activations.pkl'))
    xs, ys, filenames = from_csv_with_filenames(
                        os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data.csv')
                        )
    train_svc_report_errors(xs, ys, filenames)
Пример #5
0
from models.som.SOM import SOM
from models.som.wordLearningTest import iterativeTraining
from utils.constants import Constants
from utils.utils import from_csv_with_filenames
import os
"""
Train an auditive som, test it alongside the visual one
"""

somv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'visual_model')

somu_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_model')

audio_data_path = os.path.join(Constants.DATA_FOLDER, '10classes',
                               'audio_data_40t.csv')

if __name__ == '__main__':
    xs, ys, filenames = from_csv_with_filenames(audio_data_path)
    vect_size = len(xs[0])
    audio_som = SOM(20,
                    30,
                    vect_size,
                    n_iterations=100,
                    checkpoint_dir=somu_path)
    audio_som.train(xs)
    iterativeTraining(somv_path, somu_path)
    parser.add_argument(
        '--is-audio',
        action='store_true',
        default=False,
        help=
        'Specify whether the csv contains audio representations, as the loading functions are different.'
    )
    parser.add_argument('--cluster', action='store_true', default=False)

    args = parser.parse_args()

    if not args.classes100:
        num_classes = 10
        if not args.is_audio:
            xs, ys = from_csv_visual_10classes(args.csv_path)
        else:
            xs, ys, _ = from_csv_with_filenames(args.csv_path)
        ys = [int(y) - 1000
              for y in ys]  # see comment in average_prototype_distance_matrix
    else:
        num_classes = 100
        if not args.is_audio:
            xs, ys = from_csv_visual_100classes(args.csv_path)
        else:
            xs, ys, _ = from_csv_with_filenames(args.csv_path)

    if not args.cluster:
        average_prototype_distance_matrix(xs, ys)
    else:
        cluster_compactness(xs, ys, num_classes)