def compute_kernel(k, l=None): device = "cuda:0" if torch.cuda.is_available() else "cpu" print("Computing kernel") K = torch.zeros((9000, 9000), device=device, dtype=torch.float32) for p in range(3): t, v = p * 2000, 6000 + p * 1000 X_paths = [f"data/Xt{t}{p}.csv" for t in ["r", "e"]] X = load_X(X_paths) X = torch.tensor(X, dtype=torch.float32, device=device) n, d = X.shape print(f"on data {p}...") if l is not None: for i in range(d - l): for j in range(d - l): X_i, X_j = X[:, i:i + l], X[:, j:j + l] _K = gkm(X_i, X_j, k, l) K[t:t + 2000, t:t + 2000] += _K[:2000, :2000] K[v:v + 1000, t:t + 2000] += _K[2000:, :2000] K[t:t + 2000, v:v + 1000] += _K[:2000, 2000:] K[v:v + 1000, v:v + 1000] += _K[2000:, 2000:] else: for i in range(d - k): for j in range(d - k): X_i, X_j = X[:, i:i + k], X[:, j:j + k] _K = torch.all(X_i[None] == X_j[:, None], dim=-1) * \ torch.all(X_i[None] != 0, dim=-1) * torch.all(X_j[:, None] != 0, dim=-1) K[t:t + 2000, t:t + 2000] += _K[:2000, :2000] K[v:v + 1000, t:t + 2000] += _K[2000:, :2000] K[t:t + 2000, v:v + 1000] += _K[:2000, 2000:] K[v:v + 1000, v:v + 1000] += _K[2000:, 2000:] return K.cpu().numpy()
def main(model, test_dir): X_test_signals_paths = [os.path.join(test_dir, axi) for axi in axis] X_test = load_X(X_test_signals_paths) y_test_path = os.path.join(test_dir, 'classification') y_test = load_Y(y_test_path) with tf.Session() as sess: saver = tf.train.import_meta_graph(model + '.meta') saver.restore(sess, tf.train.latest_checkpoint('./')) graph = tf.get_default_graph() x = graph.get_tensor_by_name('x:0') y = graph.get_tensor_by_name('y:0') pred = graph.get_tensor_by_name('pred:0') argmax = tf.argmax(pred, 1) correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) actual, expected = sess.run([argmax, correct_pred], feed_dict={ x: X_test, y: one_hot(y_test) }) for r in zip(actual, expected): print(r)
def main(): X_paths = [f'data/Xtr{i}.csv' for i in range(3)] X = load_X(X_paths) X2_paths = [f'data/Xte{i}.csv' for i in range(3)] X2 = load_X(X2_paths) X = np.concatenate((X, X2)) X = torch.tensor(X, dtype=torch.float32, device=device) n, d = X.shape K = torch.zeros((n, n), device=device, dtype=torch.float32) k = 6 l = None for p in range(3): t, v = p * 2000, 6000 + p * 1000 X_paths = [f"data/Xt{t}{p}.csv" for t in ["r", "e"]] X = load_X(X_paths) X = torch.tensor(X, dtype=torch.float32, device=device) n, d = X.shape print(f"on data {p}...") for i in range(d - k): for j in range(d - k): X_i, X_j = X[:, i:i + k], X[:, j:j + k] if l is not None: _K = gkm(X_i, X_j, k, l) K[t:t + 2000, t:t + 2000] += _K[:2000, :2000] K[v:v + 1000, t:t + 2000] += _K[2000:, :2000] K[t:t + 2000, v:v + 1000] += _K[:2000, 2000:] K[v:v + 1000, v:v + 1000] += _K[2000:, 2000:] else: _K = torch.all(X_i[None] == X_j[:, None], dim=-1) * \ torch.all(X_i[None] != 0, dim=-1) * torch.all(X_j[:, None] != 0, dim=-1) K[t:t + 2000, t:t + 2000] += _K[:2000, :2000] K[v:v + 1000, t:t + 2000] += _K[2000:, :2000] K[t:t + 2000, v:v + 1000] += _K[:2000, 2000:] K[v:v + 1000, v:v + 1000] += _K[2000:, 2000:] if i % 10 == 0: print(i) name = f"K{k}.npy" if l is not None: name = f"K{k}-{l}.npy" print(f"Saving in {name}.") np.save(name, K.cpu().numpy())
import numpy as np from utils import load_X, load_y, mix, standardize, add_intercept, evaluate, evaluate1d import matplotlib.pyplot as plt import theano from theano import tensor as T PURCENT = 5 # Purcentage of the set you want on the test set NUM_FRAMES = 60 DATADIR = '/baie/corpus/emoMusic/train/' # DATADIR = './train/' do_regularize = False y_, song_id, nb_of_songs = load_y(DATADIR) X_ = load_X(DATADIR, song_id) # Now let's mix everything so that we can take test_set and train_set independantly # We need to separate PER SONG X_train, y_train, X_test, y_test, song_id_tst = mix(X_, y_, PURCENT, NUM_FRAMES, song_id, nb_of_songs) print X_train.shape, y_train.shape, X_test.shape, y_test.shape # print X_train[0:3,0:3] # standardize data X_train, scaler = standardize(X_train) X_test, _ = standardize(X_test, scaler) X_train = X_train[:, [ 10, 12, 13, 17, 19, 82, 83, 84, 85, 89, 90, 91, 103, 140, 142, 146, 148,
def main(): # Parse arguments args = parse_args() # Load tower model with CustomObjectScope({'_euclidean_distance': nn_model.Model._euclidean_distance}): model = load_model(args.model_path) model.compile(optimizer='adam', loss='mean_squared_error') # Model was previously not compiled X_shape, y_shape = utils.get_shapes(args.triplets_path, "train_anchors") # Build model to compute [A, P, N] => [abs(emb(A) - emb(P)), abs(emb(A) - emb(N))] pair_distance_model = build_pair_distance_model(model, X_shape[1:]) pair_distance_model.compile(optimizer="adam", loss="mean_squared_error") # Need to compile in order to predict # Load test data _, y_test_shape = utils.get_shapes(args.triplets_path, "test") n_users = y_test_shape[1] X_test_separated = [] for j in range(n_users): X_test_j = utils.load_X(args.triplets_path, "test_" + str(j)) X_test_separated.append(X_test_j) # If no svm model supplied, and no sweep: # Train a new model if args.load_model_path is None: # Load training triplets and validation triplets X_train_anchors, _ = utils.load_examples(args.triplets_path, "train_anchors") X_train_positives, _ = utils.load_examples(args.triplets_path, "train_positives") X_train_negatives, _ = utils.load_examples(args.triplets_path, "train_negatives") # Get abs(distance) of embeddings X_train_ap, X_train_an, X_train_pn = pair_distance_model.predict([X_train_anchors, X_train_positives, X_train_negatives]) # Stack positive and negative examples X_train = np.vstack((X_train_ap, X_train_an, X_train_pn)) y_train = np.hstack((np.ones(X_train_ap.shape[0], ), np.zeros(X_train_an.shape[0] + X_train_pn.shape[0],))) # Sign of distances should not matter -> Train on both X_train = np.vstack((X_train, -X_train)) y_train = np.hstack((y_train, y_train)) # Shuffle the data X_train, y_train = shuffle(X_train, y_train) # Train SVM svm_model = svm.SVC(C=C, gamma=GAMMA, kernel=KERNEL, class_weight=CLASS_WEIGHTS, verbose=True, probability=args.sweep) svm_model.fit(X_train[:20000, :], y_train[:20000]) else: # if svm model supplied with open(args.load_model_path, "rb") as svm_file: svm_model = pickle.load(svm_file) random.seed(1) accuracy, FAR, FRR = predict_and_evaluate(pair_distance_model, svm_model, X_test_separated, args.ensemble_size, args.ensemble_type, threshold=0.5, probability=False) print("\n---- Test Results ----") print("Accuracy = {}".format(accuracy)) print("FAR = {}".format(FAR)) print("FRR = {}".format(FRR)) if args.sweep: # Sweep the threshold FARs, FRRs = [], [] min_diff = float("inf") FAR_EER, FRR_EER = 1, 1 accuracy_EER = 0 threshold_EER = None for threshold in np.arange(0, 1, 0.01): # Predict and evaluate accuracy, FAR, FRR = predict_and_evaluate(pair_distance_model, svm_model, X_test_separated, args.ensemble_size, args.ensemble_type, threshold=threshold, probability=True) # Store results FARs.append(FAR) FRRs.append(FRR) if np.abs(FAR - FRR) < min_diff: FAR_EER = FAR FRR_EER = FRR accuracy_EER = accuracy threshold_EER = threshold min_diff = np.abs(FAR - FRR) # Report EER and corresponding accuracy print("\n ---- Test Results: EER ----") print("Accuracy = {}".format(accuracy_EER)) print("FAR = {}".format(FAR_EER)) print("FRR = {}".format(FRR_EER)) print("Threshold EER = {}".format(threshold_EER)) # Plot FRR vs FAR plt.figure() plt.scatter(FARs, FRRs) plt.xlabel("FAR") plt.ylabel("FRR") plt.savefig("FRR_FAR.pdf") else: # no sweep random.seed(1) accuracy, FAR, FRR = predict_and_evaluate(pair_distance_model, svm_model, X_test_separated, args.ensemble_size, args.ensemble_type, threshold=0.5, probability=False) print("\n---- Test Results ----") print("Accuracy = {}".format(accuracy)) print("FAR = {}".format(FAR)) print("FRR = {}".format(FRR)) # Save svm model if args.save_model_path is not None: with open(args.save_model_path + "svm_model.pkl", "wb") as svm_file: pickle.dump(svm_model, svm_file)
import numpy as np from utils import load_X, load_y, mix, standardize, add_intercept, evaluate, evaluate1d import matplotlib.pyplot as plt import theano from theano import tensor as T PURCENT = 5 # Purcentage of the set you want on the test set NUM_FRAMES = 60 DATADIR = "/baie/corpus/emoMusic/train/" # DATADIR = './train/' do_regularize = False y_, song_id, nb_of_songs = load_y(DATADIR) X_ = load_X(DATADIR, song_id) # Now let's mix everything so that we can take test_set and train_set independantly # We need to separate PER SONG X_train, y_train, X_test, y_test, song_id_tst = mix(X_, y_, PURCENT, NUM_FRAMES, song_id, nb_of_songs) print X_train.shape, y_train.shape, X_test.shape, y_test.shape # print X_train[0:3,0:3] # print np.mean(X_train[:,0:3], axis=0), np.std(X_train[:,0:3], axis=0) # print np.mean(X_test[:,0:3], axis=0), np.std(X_test[:,0:3], axis=0) # with(open('train_dummy.txt', mode='w')) as infile: # for i in range(X_train.shape[0]): # s='' # for feat in range(3): # s = s + '%g '%X_train[i,feat] # infile.write('%s\n'%s)
index = ((step-1)*batch_size + i) % len(_train) batch_s[i] = _train[index] return batch_s X_train_signals_paths = [ os.path.join(input_dir, 'train', axi) for axi in axis ] X_test_signals_paths = [ os.path.join(input_dir, 'test', axi) for axi in axis ] X_train = load_X(X_train_signals_paths) X_test = load_X(X_test_signals_paths) y_train_path = os.path.join(input_dir, 'train', 'classification') y_test_path = os.path.join(input_dir, 'test', 'classification') y_train = load_Y(y_train_path) y_test = load_Y(y_test_path) # Input Data training_data_count = len(X_train) # 7352 training series (with 50% overlap between each serie) test_data_count = len(X_test) # 2947 testing series n_steps = len(X_train[0]) # 128 timesteps per series n_input = len(X_train[0][0]) # 9 input parameters per timestep
__author__ = 'giulio' import utils as ut import features_selection as fs from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.cross_validation import StratifiedKFold from sklearn.preprocessing import scale if __name__ == "__main__": X, y = ut.load_X(), ut.load_y() print X.shape, y X = scale(X) clf = GradientBoostingClassifier() X = X[:, :11] fs.exaustive_selection(clf, X, y, fold=StratifiedKFold(y, n_folds=5))
__author__ = 'giulio' from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier import utils as ut import numpy as np import os from sklearn.svm import SVC from sklearn.preprocessing import scale, MinMaxScaler reload(ut) X_train, y_train = ut.load_X(), ut.load_y() X_test = ut.load_X_test() X = np.vstack((X_train, X_test)) X = scale(X) X_train = X[:X_train.shape[0]] X_test = X[X_train.shape[0]:] # mms = MinMaxScaler() # X = mms.fit_transform(X) clf1 = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0, kernel='rbf', max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001, verbose=False) clf2 = GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance', max_depth=3, max_features=None, max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100,
def get_subset(label): feature = load_X( '/home/t-yud/ZSL/data/ImageNet/res101_1crop_feature/{}.bin'.format( label)) feature = torch.from_numpy(feature).float().cuda() return feature
def main(model, test_dir): X_test_signals_paths = [os.path.join(test_dir, axi) for axi in axis] Y_test_path = os.path.join(test_dir, 'classification') X_test = load_X(X_test_signals_paths) Y_test = load_Y(Y_test_path) n_hidden = 32 n_classes = 5 lambda_loss_amount = 0.0015 learning_rate = 0.0025 n_steps = len(X_test[0]) n_input = len(X_test[0][0]) print('n_steps: {} n_input: {}'.format(n_steps, n_input)) x = tf.placeholder(tf.float32, [None, n_steps, n_input]) y = tf.placeholder(tf.float32, [None, n_classes]) weights = { 'hidden': tf.Variable(tf.random_normal([n_input, n_hidden])), # Hidden layer weights 'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0)) } biases = { 'hidden': tf.Variable(tf.random_normal([n_hidden])), 'out': tf.Variable(tf.random_normal([n_classes])) } pred = LSTM_RNN(n_input, n_steps, n_hidden, x, weights, biases) correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) l2 = lambda_loss_amount * sum( tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables() ) # L2 loss prevents this overkill neural network to overfit the data cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=y, logits=pred)) + l2 # Softmax loss optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize( cost) # Adam Optimizer cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( labels=y, logits=pred)) + l2 # Softmax loss saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, model) print('Model restored') loss, acc = sess.run([cost, accuracy], feed_dict={ x: X_test, y: one_hot(Y_test) }) print("PERFORMANCE ON TEST SET: " + \ "Batch Loss = {}".format(loss) + \ ", Accuracy = {}".format(acc))