from models.som.SOM import SOM from models.som.SOMTest import showSom from oneShotLearning.utility import * from utils.constants import Constants from utils.utils import from_csv_with_filenames, from_csv_visual_10classes audio_data_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data_25t.csv') visual_data_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'VisualInputTrainingSet.csv') classes = list(range(0, 10)) if __name__ == '__main__': print('classes', classes) a_xs, a_ys, _ = from_csv_with_filenames(audio_data_path) v_xs, v_ys = from_csv_visual_10classes(visual_data_path) a_ys = [int(y) - 1000 for y in a_ys] v_ys = [int(y) - 1000 for y in v_ys] # scale data to 0-1 range a_xs = StandardScaler().fit_transform(a_xs) v_xs = StandardScaler().fit_transform(v_xs) for i in range(0, 1): a_xs_train, a_ys_train, a_xs_test, a_ys_test = get_random_classes( a_xs, a_ys, classes, 10, 2) v_xs_train, v_ys_train, v_xs_test, v_ys_test = get_random_classes( v_xs, v_ys, classes, 10, 2) # print('shape audio input train', np.shape(a_xs_train)) # print('shape audio labels train', np.shape(a_ys_train)) # print('shape visual input train', np.shape(v_xs_train)) # print('shape visual labels train', np.shape(v_ys_train))
from models.som.SOMTest import showSom from utils.utils import load_from_pickle from utils.utils import load_data from utils.utils import from_csv_with_filenames from utils.constants import Constants from sklearn.externals import joblib import os import logging import numpy as np csv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data.csv') LOAD = True if __name__ == '__main__': logging.info('Loading data') xs, ys, filenames = from_csv_with_filenames(csv_path) ys = [int(y) - 1000 for y in ys] vect_size = len(xs[0]) audio_som = SOM(20, 30, vect_size, n_iterations=100, checkpoint_dir=os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_model', '')) if not LOAD: audio_som.train(xs) else: logging.info('Training som') audio_som.restore_trained() #audio_som.plot_som(xs, ys, plot_name='audio_som.png')
for i, x in enumerate(xs): prototype_dict[ys[i]].append(x) prototype_dict = { k: np.array(prototype) for k, prototype in prototype_dict.items() } for y in set(ys): prototype_dict[y] = np.mean(prototype_dict[y], axis=0) prototypes = np.asarray(list(prototype_dict.values())).T for i, x in enumerate(xs): prototype_distance_matrix[ys[i]][:] += np.mean( np.absolute(prototypes - x.reshape((-1, 1))), axis=0).T print(prototype_distance_matrix) plt.matshow(prototype_distance_matrix, cmap=plt.get_cmap('Greys')) plt.show() def examples_distance(xs, i1, i2): return np.linalg.norm(xs[i1] - xs[i2]) if __name__ == '__main__': xs, ys, filenames = from_csv_with_filenames(CSV_PATH) ys = [int(y) - 1000 for y in ys] # see comment above average_prototype_distance_matrix(xs, ys, filenames) #i1 = 23 #i2 = 163 #d = examples_distance(xs, i1, i2) #print(d)
flat_xs = np.array([x.ravel() for x in xs]) ys = np.array(ys) j = 0 for train_i, test_i in kf.split(flat_xs): rbf = SVC() linear = LinearSVC() print('Fitting RBF...') rbf.fit(flat_xs[train_i], ys[train_i]) print('Fitting linear...') linear.fit(flat_xs[train_i], ys[train_i]) pred = rbf.predict(flat_xs[test_i]) results_rbf.append(np.average(pred == ys[test_i])) pred = linear.predict(flat_xs[test_i]) results_linear.append(np.average(pred == ys[test_i])) print('Fold {}, wrong ones: '.format(j)) for i, is_correct in enumerate(pred == ys[test_i]): if not is_correct: print('{}: was predicted {}' .format(filenames[test_i[i]], pred[i])) j += 1 print('SVC RBF: {}; SVC Linear: {}'.format(np.average(results_rbf), np.average(results_linear))) if __name__ == '__main__': logging.info('Loading pickle') #xs, ys = load_data(os.path.join(Constants.DATA_FOLDER, 'activations.pkl')) xs, ys, filenames = from_csv_with_filenames( os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data.csv') ) train_svc_report_errors(xs, ys, filenames)
from models.som.SOM import SOM from models.som.wordLearningTest import iterativeTraining from utils.constants import Constants from utils.utils import from_csv_with_filenames import os """ Train an auditive som, test it alongside the visual one """ somv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'visual_model') somu_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_model') audio_data_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data_40t.csv') if __name__ == '__main__': xs, ys, filenames = from_csv_with_filenames(audio_data_path) vect_size = len(xs[0]) audio_som = SOM(20, 30, vect_size, n_iterations=100, checkpoint_dir=somu_path) audio_som.train(xs) iterativeTraining(somv_path, somu_path)
parser.add_argument( '--is-audio', action='store_true', default=False, help= 'Specify whether the csv contains audio representations, as the loading functions are different.' ) parser.add_argument('--cluster', action='store_true', default=False) args = parser.parse_args() if not args.classes100: num_classes = 10 if not args.is_audio: xs, ys = from_csv_visual_10classes(args.csv_path) else: xs, ys, _ = from_csv_with_filenames(args.csv_path) ys = [int(y) - 1000 for y in ys] # see comment in average_prototype_distance_matrix else: num_classes = 100 if not args.is_audio: xs, ys = from_csv_visual_100classes(args.csv_path) else: xs, ys, _ = from_csv_with_filenames(args.csv_path) if not args.cluster: average_prototype_distance_matrix(xs, ys) else: cluster_compactness(xs, ys, num_classes)