acc = [] for i in range(20): n_classes = 4 dataset = OneHotDataset(n_classes) a_xs = dataset.x a_ys = dataset.y v_xs = dataset.x v_ys = dataset.y # scale audio data to 0-1 range a_xs = MinMaxScaler().fit_transform(a_xs) v_xs = MinMaxScaler().fit_transform(v_xs) a_dim = len(a_xs[0]) v_dim = len(v_xs[0]) som_a = SOM(5, 5, a_dim, checkpoint_dir=soma_path, n_iterations=100, batch_size=4) som_v = SOM(5, 5, v_dim, checkpoint_dir=somv_path, n_iterations=100, batch_size=4) som_a.train(a_xs) som_v.train(v_xs) som_a.memorize_examples_by_class(a_xs, a_ys) som_v.memorize_examples_by_class(v_xs, v_ys) hebbian_model = HebbianModel(som_a, som_v, a_dim=a_dim,
for i in range(0, 1): a_xs_train, a_ys_train, a_xs_test, a_ys_test = get_random_classes( a_xs, a_ys, classes, 10, 2) v_xs_train, v_ys_train, v_xs_test, v_ys_test = get_random_classes( v_xs, v_ys, classes, 10, 2) # print('shape audio input train', np.shape(a_xs_train)) # print('shape audio labels train', np.shape(a_ys_train)) # print('shape visual input train', np.shape(v_xs_train)) # print('shape visual labels train', np.shape(v_ys_train)) a_dim = len(a_xs[0]) v_dim = len(v_xs[0]) som_a = SOM(20, 30, a_dim, alpha=0.9, sigma=30, n_iterations=100, tau=0.1, threshold=0.6, batch_size=1) type_file = 'visual_' + str(i + 1) som_v = SOM(20, 30, v_dim, alpha=0.7, sigma=15, n_iterations=100, threshold=0.6, batch_size=1, data=type_file) som_a.train(a_xs_train,
if __name__ == '__main__': a_xs, a_ys = from_csv(audio_data_path) v_xs, v_ys = from_csv_visual_10classes(visual_data_path) # fix labels to 0-9 range a_ys = [int(y) - 1000 for y in a_ys] v_ys = [int(y) - 1000 for y in v_ys] # scale data to 0-1 range a_xs = MinMaxScaler().fit_transform(a_xs) v_xs = MinMaxScaler().fit_transform(v_xs) # create fake examples for audio a_xs, a_ys = create_dummy_audio_examples(a_xs, v_xs, a_ys, v_ys) a_dim = len(a_xs[0]) v_dim = len(v_xs[0]) som_a = SOM(20, 30, a_dim, checkpoint_dir=soma_path, n_iterations=200) som_v = SOM(20, 30, v_dim, checkpoint_dir=somv_path, n_iterations=200) som_a.restore_trained() som_v.restore_trained() for n in range(1, 15): hebbian_model = HebbianModel(som_a, som_v, a_dim=a_dim, v_dim=v_dim, n_presentations=n, checkpoint_dir=hebbian_path, tau=0.1, learning_rate=100) # create em folds v_ys = np.array(v_ys) v_xs = np.array(v_xs)
from utils.utils import load_data from utils.utils import from_csv_with_filenames from utils.constants import Constants from sklearn.externals import joblib import os import logging import numpy as np csv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data.csv') LOAD = True if __name__ == '__main__': logging.info('Loading data') xs, ys, filenames = from_csv_with_filenames(csv_path) ys = [int(y) - 1000 for y in ys] vect_size = len(xs[0]) audio_som = SOM(20, 30, vect_size, n_iterations=100, checkpoint_dir=os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_model', '')) if not LOAD: audio_som.train(xs) else: logging.info('Training som') audio_som.restore_trained() #audio_som.plot_som(xs, ys, plot_name='audio_som.png') showSom(audio_som, xs, ys, 1, 'Audio Map', filenames=filenames)
from models.som.SOM import SOM from models.som.SOMTest import showSom import numpy as np from utils.constants import Constants from utils.utils import from_csv_visual from sklearn.preprocessing import MinMaxScaler import os import logging visual_data_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'VisualInputTrainingSet.csv') N = 1000 lenExample = 2048 if __name__ == '__main__': v_xs, v_ys = from_csv_visual(visual_data_path) v_xs = MinMaxScaler().fit_transform(v_xs) som = SOM(20, 30, lenExample, checkpoint_dir=os.path.join(Constants.DATA_FOLDER, 'visual_model_mine', ''), n_iterations=100, sigma=4.0) som.restore_trained() showSom(som, v_xs, v_ys, 1, 'Visual map')
if __name__ == '__main__': acc = [] for i in range(20): n_classes = 4 dataset = OneHotDataset(n_classes) a_xs = dataset.x a_ys = dataset.y v_xs = dataset.x v_ys = dataset.y # scale audio data to 0-1 range a_xs = MinMaxScaler().fit_transform(a_xs) v_xs = MinMaxScaler().fit_transform(v_xs) a_dim = len(a_xs[0]) v_dim = len(v_xs[0]) som_a = SOM(5, 5, a_dim, n_iterations=100, batch_size=4) som_v = SOM(5, 5, v_dim, n_iterations=100, batch_size=4) som_a.train(a_xs, input_classes=v_ys) som_v.train(v_xs, input_classes=v_xs) som_a.memorize_examples_by_class(a_xs, a_ys) som_v.memorize_examples_by_class(v_xs, v_ys) hebbian_model = HebbianModel(som_a, som_v, a_dim=a_dim, v_dim=v_dim, n_presentations=1, learning_rate=1, n_classes=n_classes, checkpoint_dir=hebbian_path) print('Training...') hebbian_model.train(a_xs, v_xs) print('Evaluating...') accuracy = hebbian_model.evaluate(a_xs, v_xs, a_ys, v_ys, source='a', prediction_alg='regular') hebbian_model.make_plot(a_xs[0], v_xs[0], v_ys[0], v_xs, source='a') acc.append(accuracy) print('n={}, accuracy={}'.format(1, accuracy))
from utils.constants import Constants from utils.utils import from_csv_with_filenames, from_csv_visual import os soma_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_model', '') somv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'visual_model', '') hebbian_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'hebbian_model', '') audio_data_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data.csv') visual_data_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'VisualInputTrainingSet.csv') if __name__ == '__main__': a_xs, a_ys, filenames = from_csv_with_filenames(audio_data_path) v_xs, v_ys = from_csv_visual(visual_data_path) a_dim = len(a_xs[0]) v_dim = len(v_xs[0]) som_a = SOM(20, 30, a_dim, checkpoint_dir=soma_path) som_v = SOM(20, 30, v_dim, checkpoint_dir=somv_path) som_a.restore_trained() som_v.restore_trained() hebbian_model = HebbianModel(som_a, som_v, a_dim=a_dim, v_dim=v_dim, n_presentations=10, checkpoint_dir=hebbian_path) hebbian_model.train(a_xs[:10], v_xs[:10])
parser.add_argument('--alpha', metavar='alpha', type=float, default=0.0001, help='The SOM initial learning rate') parser.add_argument('--epochs', type=int, default=10000, help='Number of epochs the SOM will be trained for') args = parser.parse_args() if args.data == 'audio': xs, ys, _ = from_csv_with_filenames(audio_data_path) elif args.data == 'video': xs, ys = from_csv_visual_100classes(visual_data_path) else: raise ValueError('--data argument not recognized') dim = len(xs[0]) som = SOM(args.neurons1, args.neurons2, dim, n_iterations=args.epochs, alpha=args.alpha, checkpoint_loc=args.path, tau=0.1, threshold=0.6, batch_size=args.batch, data=args.data, sigma=args.sigma) ys = np.array(ys) xs = np.array(xs) if args.subsample: xs, _, ys, _ = train_test_split(xs, ys, test_size=0.6, stratify=ys, random_state=args.seed) xs_train, xs_test, ys_train, ys_test = train_test_split(xs, ys, test_size=0.2, stratify=ys, random_state=args.seed) xs_train, xs_val, ys_train, ys_val = train_test_split(xs_train, ys_train, test_size=0.5, stratify=ys_train, random_state=args.seed) xs_train, xs_test = transform_data(xs_train, xs_val, rotation=args.rotation)
if __name__ == '__main__': a_xs, a_ys, _ = from_csv_with_filenames(audio_data_path) v_xs, v_ys = from_csv_visual(visual_data_path) # fix labels to 0-9 range a_ys = [int(y) - 1000 for y in a_ys] v_ys = [int(y) - 1000 for y in v_ys] # scale data to 0-1 range a_xs = MinMaxScaler().fit_transform(a_xs) v_xs = MinMaxScaler().fit_transform(v_xs) a_dim = len(a_xs[0]) v_dim = len(v_xs[0]) som_a = SOM(20, 30, a_dim, checkpoint_dir=soma_path, n_iterations=200, tau=0.1, threshold=0.6) som_v = SOM(20, 30, v_dim, checkpoint_dir=somv_path, n_iterations=200, tau=0.1, threshold=0.6) v_ys = np.array(v_ys) v_xs = np.array(v_xs) a_xs = np.array(a_xs) a_ys = np.array(a_ys)
from models.som.SOM import SOM from models.som.wordLearningTest import iterativeTraining from utils.constants import Constants from utils.utils import from_csv_with_filenames import os """ Train an auditive som, test it alongside the visual one """ somv_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'visual_model') somu_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_model') audio_data_path = os.path.join(Constants.DATA_FOLDER, '10classes', 'audio_data_40t.csv') if __name__ == '__main__': xs, ys, filenames = from_csv_with_filenames(audio_data_path) vect_size = len(xs[0]) audio_som = SOM(20, 30, vect_size, n_iterations=100, checkpoint_dir=somu_path) audio_som.train(xs) iterativeTraining(somv_path, somu_path)
parser.add_argument('--batch', type=int, default=128) args = parser.parse_args() if args.data == 'audio': xs, ys, _ = from_csv_with_filenames(audio_data_path) elif args.data == 'video': xs, ys = from_csv_visual_100classes(visual_data_path) else: raise ValueError('--data argument not recognized') dim = len(xs[0]) som = SOM(args.neurons1, args.neurons2, dim, n_iterations=args.epochs, alpha=args.alpha, tau=0.1, threshold=0.6, batch_size=args.batch, data=args.data, sigma=args.sigma, num_classes=args.classes, sigma_decay='constant') ys = np.array(ys) xs = np.array(xs) if args.subsample: xs, _, ys, _ = train_test_split(xs, ys, test_size=0.6, stratify=ys, random_state=args.seed) print('Training on {} examples.'.format(len(xs))) xs_train, xs_test, ys_train, ys_test = train_test_split(xs, ys, test_size=0.2, stratify=ys, random_state=args.seed) xs_train, xs_val, ys_train, ys_val = train_test_split(xs_train, ys_train, test_size=0.5, stratify=ys_train, random_state=args.seed)
parser.add_argument('--train', action='store_true', default=True) args = parser.parse_args() exp_description = 'lr' + str( args.lr) + '_algo_' + args.algo + '_source_' + args.source a_xs, a_ys, _ = from_csv_with_filenames(audio_data_path) v_xs, v_ys = from_csv_visual_10classes(visual_data_path) # fix labels to 0-9 range a_ys = [int(y) - 1000 for y in a_ys] v_ys = [int(y) - 1000 for y in v_ys] # scale data to 0-1 range a_xs = MinMaxScaler().fit_transform(a_xs) v_xs = MinMaxScaler().fit_transform(v_xs) a_dim = len(a_xs[0]) v_dim = len(v_xs[0]) som_a = SOM(20, 30, a_dim, n_iterations=100, tau=0.1, threshold=0.6) som_v = SOM(20, 30, v_dim, n_iterations=100, tau=0.1, threshold=0.6) v_ys = np.array(v_ys) v_xs = np.array(v_xs) a_xs = np.array(a_xs) a_ys = np.array(a_ys) a_xs_train, a_xs_test, a_ys_train, a_ys_test = train_test_split( a_xs, a_ys, test_size=0.2) v_xs_train, v_xs_test, v_ys_train, v_ys_test = train_test_split( v_xs, v_ys, test_size=0.2) a_xs_train, a_xs_dev, a_ys_train, a_ys_dev = train_test_split( a_xs, a_ys, test_size=0.2) v_xs_train, v_xs_dev, v_ys_train, v_ys_dev = train_test_split( v_xs, v_ys, test_size=0.2)
nameInputs = list() with open(fInput, 'r') as inp: i = 0 for line in inp: if len(line) > 2: inputs[i] = (np.array(line.split(',')[1:])).astype(np.float) nameInputs.append((line.split(',')[0]).split('/')[6]) i = i + 1 print(nameInputs[0]) #get the 20x30 SOM or train a new one (if the folder does not contain the model) som = SOM(20, 30, lenExample, checkpoint_dir=os.path.join(Constants.DATA_FOLDER, 'VisualModel10classes/'), n_iterations=20, sigma=4.0) loaded = som.restore_trained() if not loaded: logging.info('Training SOM') som.train(inputs) for k in range(len(nameInputs)): nameInputs[k] = nameInputs[k].split('_')[0] #shows the SOM showSom(som, inputs, nameInputs, 1, 'Visual map')
parser.add_argument( '--is-audio', action='store_true', default=False, help= 'Specify whether the csv contains audio representations, as the loading functions are different.' ) args = parser.parse_args() if not args.classes100: num_classes = 10 if not args.is_audio: xs, ys = from_csv_visual_10classes(args.csv_path) else: xs, ys, _ = from_csv_with_filenames(args.csv_path) ys = [int(y) - 1000 for y in ys] # see comment in average_prototype_distance_matrix else: num_classes = 100 if not args.is_audio: xs, ys = from_csv_visual_100classes(args.csv_path) else: xs, ys, _ = from_csv_with_filenames(args.csv_path) som = SOM(20, 30, len(xs[0]), checkpoint_dir=args.model_path) som.restore_trained() measure = class_compactness(som, xs, ys) print('Class Compactness: {}.'.format(measure)) print('Avg Compactness: {}\n Variance: {}'.format(np.mean(measure), np.var(measure)))