Пример #1
0
def compute_kernel(k, l=None):
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print("Computing kernel")
    K = torch.zeros((9000, 9000), device=device, dtype=torch.float32)
    for p in range(3):
        t, v = p * 2000, 6000 + p * 1000
        X_paths = [f"data/Xt{t}{p}.csv" for t in ["r", "e"]]
        X = load_X(X_paths)
        X = torch.tensor(X, dtype=torch.float32, device=device)
        n, d = X.shape
        print(f"on data {p}...")
        if l is not None:
            for i in range(d - l):
                for j in range(d - l):
                    X_i, X_j = X[:, i:i + l], X[:, j:j + l]
                    _K = gkm(X_i, X_j, k, l)
                    K[t:t + 2000, t:t + 2000] += _K[:2000, :2000]
                    K[v:v + 1000, t:t + 2000] += _K[2000:, :2000]
                    K[t:t + 2000, v:v + 1000] += _K[:2000, 2000:]
                    K[v:v + 1000, v:v + 1000] += _K[2000:, 2000:]
        else:
            for i in range(d - k):
                for j in range(d - k):
                    X_i, X_j = X[:, i:i + k], X[:, j:j + k]
                    _K = torch.all(X_i[None] == X_j[:, None], dim=-1) * \
                         torch.all(X_i[None] != 0, dim=-1) * torch.all(X_j[:, None] != 0, dim=-1)
                    K[t:t + 2000, t:t + 2000] += _K[:2000, :2000]
                    K[v:v + 1000, t:t + 2000] += _K[2000:, :2000]
                    K[t:t + 2000, v:v + 1000] += _K[:2000, 2000:]
                    K[v:v + 1000, v:v + 1000] += _K[2000:, 2000:]
    return K.cpu().numpy()
Пример #2
0
def main(model, test_dir):
    X_test_signals_paths = [os.path.join(test_dir, axi) for axi in axis]
    X_test = load_X(X_test_signals_paths)

    y_test_path = os.path.join(test_dir, 'classification')
    y_test = load_Y(y_test_path)

    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(model + '.meta')
        saver.restore(sess, tf.train.latest_checkpoint('./'))

        graph = tf.get_default_graph()

        x = graph.get_tensor_by_name('x:0')
        y = graph.get_tensor_by_name('y:0')
        pred = graph.get_tensor_by_name('pred:0')

        argmax = tf.argmax(pred, 1)
        correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        actual, expected = sess.run([argmax, correct_pred],
                                    feed_dict={
                                        x: X_test,
                                        y: one_hot(y_test)
                                    })

        for r in zip(actual, expected):
            print(r)
Пример #3
0
def main():

    X_paths = [f'data/Xtr{i}.csv' for i in range(3)]
    X = load_X(X_paths)

    X2_paths = [f'data/Xte{i}.csv' for i in range(3)]
    X2 = load_X(X2_paths)
    X = np.concatenate((X, X2))

    X = torch.tensor(X, dtype=torch.float32, device=device)
    n, d = X.shape
    K = torch.zeros((n, n), device=device, dtype=torch.float32)
    k = 6
    l = None

    for p in range(3):
        t, v = p * 2000, 6000 + p * 1000
        X_paths = [f"data/Xt{t}{p}.csv" for t in ["r", "e"]]
        X = load_X(X_paths)
        X = torch.tensor(X, dtype=torch.float32, device=device)
        n, d = X.shape
        print(f"on data {p}...")
        for i in range(d - k):
            for j in range(d - k):
                X_i, X_j = X[:, i:i + k], X[:, j:j + k]
                if l is not None:
                    _K = gkm(X_i, X_j, k, l)
                    K[t:t + 2000, t:t + 2000] += _K[:2000, :2000]
                    K[v:v + 1000, t:t + 2000] += _K[2000:, :2000]
                    K[t:t + 2000, v:v + 1000] += _K[:2000, 2000:]
                    K[v:v + 1000, v:v + 1000] += _K[2000:, 2000:]
                else:
                    _K = torch.all(X_i[None] == X_j[:, None], dim=-1) * \
                         torch.all(X_i[None] != 0, dim=-1) * torch.all(X_j[:, None] != 0, dim=-1)
                    K[t:t + 2000, t:t + 2000] += _K[:2000, :2000]
                    K[v:v + 1000, t:t + 2000] += _K[2000:, :2000]
                    K[t:t + 2000, v:v + 1000] += _K[:2000, 2000:]
                    K[v:v + 1000, v:v + 1000] += _K[2000:, 2000:]
            if i % 10 == 0:
                print(i)

    name = f"K{k}.npy"
    if l is not None:
        name = f"K{k}-{l}.npy"
    print(f"Saving in {name}.")
    np.save(name, K.cpu().numpy())
Пример #4
0
import numpy as np
from utils import load_X, load_y, mix, standardize, add_intercept, evaluate, evaluate1d
import matplotlib.pyplot as plt
import theano
from theano import tensor as T

PURCENT = 5  # Purcentage of the set you want on the test set
NUM_FRAMES = 60
DATADIR = '/baie/corpus/emoMusic/train/'
# DATADIR = './train/'

do_regularize = False

y_, song_id, nb_of_songs = load_y(DATADIR)
X_ = load_X(DATADIR, song_id)

# Now  let's mix everything so that we can take test_set and train_set independantly
# We need to separate PER SONG
X_train, y_train, X_test, y_test, song_id_tst = mix(X_, y_, PURCENT,
                                                    NUM_FRAMES, song_id,
                                                    nb_of_songs)
print X_train.shape, y_train.shape, X_test.shape, y_test.shape
# print X_train[0:3,0:3]

# standardize data
X_train, scaler = standardize(X_train)
X_test, _ = standardize(X_test, scaler)

X_train = X_train[:, [
    10, 12, 13, 17, 19, 82, 83, 84, 85, 89, 90, 91, 103, 140, 142, 146, 148,
Пример #5
0
def main():

    # Parse arguments
    args = parse_args()

    # Load tower model
    with CustomObjectScope({'_euclidean_distance': nn_model.Model._euclidean_distance}):
        model = load_model(args.model_path)
        model.compile(optimizer='adam', loss='mean_squared_error')  # Model was previously not compiled

    X_shape, y_shape = utils.get_shapes(args.triplets_path, "train_anchors")

    # Build model to compute [A, P, N] => [abs(emb(A) - emb(P)), abs(emb(A) - emb(N))]
    pair_distance_model = build_pair_distance_model(model, X_shape[1:])
    pair_distance_model.compile(optimizer="adam", loss="mean_squared_error")  # Need to compile in order to predict

    # Load test data
    _, y_test_shape = utils.get_shapes(args.triplets_path, "test")
    n_users = y_test_shape[1]
    X_test_separated = []
    for j in range(n_users):
        X_test_j = utils.load_X(args.triplets_path, "test_" + str(j))
        X_test_separated.append(X_test_j)

    # If no svm model supplied, and no sweep:
    # Train a new model
    if args.load_model_path is None:

        # Load training triplets and validation triplets
        X_train_anchors, _ = utils.load_examples(args.triplets_path, "train_anchors")
        X_train_positives, _ = utils.load_examples(args.triplets_path, "train_positives")
        X_train_negatives, _ = utils.load_examples(args.triplets_path, "train_negatives")

        # Get abs(distance) of embeddings
        X_train_ap, X_train_an, X_train_pn = pair_distance_model.predict([X_train_anchors, X_train_positives, X_train_negatives])

        # Stack positive and negative examples
        X_train = np.vstack((X_train_ap, X_train_an, X_train_pn))
        y_train = np.hstack((np.ones(X_train_ap.shape[0], ), np.zeros(X_train_an.shape[0] + X_train_pn.shape[0],)))

        # Sign of distances should not matter ->  Train on both
        X_train = np.vstack((X_train, -X_train))
        y_train = np.hstack((y_train, y_train))

        # Shuffle the data
        X_train, y_train = shuffle(X_train, y_train)

        # Train SVM
        svm_model = svm.SVC(C=C, gamma=GAMMA, kernel=KERNEL, class_weight=CLASS_WEIGHTS, verbose=True, probability=args.sweep)
        svm_model.fit(X_train[:20000, :], y_train[:20000])

    else:  # if svm model supplied

        with open(args.load_model_path, "rb") as svm_file:
            svm_model = pickle.load(svm_file)

        random.seed(1)
        accuracy, FAR, FRR = predict_and_evaluate(pair_distance_model, svm_model, X_test_separated,
                                                  args.ensemble_size, args.ensemble_type, threshold=0.5, probability=False)

        print("\n---- Test Results ----")
        print("Accuracy = {}".format(accuracy))
        print("FAR = {}".format(FAR))
        print("FRR = {}".format(FRR))

    if args.sweep:

        # Sweep the threshold
        FARs, FRRs = [], []
        min_diff = float("inf")
        FAR_EER, FRR_EER = 1, 1
        accuracy_EER = 0
        threshold_EER = None
        for threshold in np.arange(0, 1, 0.01):

            # Predict and evaluate
            accuracy, FAR, FRR = predict_and_evaluate(pair_distance_model, svm_model, X_test_separated,
                                                      args.ensemble_size, args.ensemble_type, threshold=threshold, probability=True)

            # Store results
            FARs.append(FAR)
            FRRs.append(FRR)
            if np.abs(FAR - FRR) < min_diff:
                FAR_EER = FAR
                FRR_EER = FRR
                accuracy_EER = accuracy
                threshold_EER = threshold
                min_diff = np.abs(FAR - FRR)

        # Report EER and corresponding accuracy
        print("\n ---- Test Results: EER ----")
        print("Accuracy = {}".format(accuracy_EER))
        print("FAR = {}".format(FAR_EER))
        print("FRR = {}".format(FRR_EER))
        print("Threshold EER = {}".format(threshold_EER))

        # Plot FRR vs FAR
        plt.figure()
        plt.scatter(FARs, FRRs)
        plt.xlabel("FAR")
        plt.ylabel("FRR")
        plt.savefig("FRR_FAR.pdf")

    else:  # no sweep

        random.seed(1)
        accuracy, FAR, FRR = predict_and_evaluate(pair_distance_model, svm_model, X_test_separated,
                                                  args.ensemble_size, args.ensemble_type, threshold=0.5, probability=False)

        print("\n---- Test Results ----")
        print("Accuracy = {}".format(accuracy))
        print("FAR = {}".format(FAR))
        print("FRR = {}".format(FRR))

    # Save svm model
    if args.save_model_path is not None:
        with open(args.save_model_path + "svm_model.pkl", "wb") as svm_file:
            pickle.dump(svm_model, svm_file)
Пример #6
0
import numpy as np
from utils import load_X, load_y, mix, standardize, add_intercept, evaluate, evaluate1d
import matplotlib.pyplot as plt
import theano
from theano import tensor as T

PURCENT = 5  # Purcentage of the set you want on the test set
NUM_FRAMES = 60
DATADIR = "/baie/corpus/emoMusic/train/"
# DATADIR = './train/'

do_regularize = False

y_, song_id, nb_of_songs = load_y(DATADIR)
X_ = load_X(DATADIR, song_id)

# Now  let's mix everything so that we can take test_set and train_set independantly
# We need to separate PER SONG
X_train, y_train, X_test, y_test, song_id_tst = mix(X_, y_, PURCENT, NUM_FRAMES, song_id, nb_of_songs)
print X_train.shape, y_train.shape, X_test.shape, y_test.shape
# print X_train[0:3,0:3]
# print np.mean(X_train[:,0:3], axis=0), np.std(X_train[:,0:3], axis=0)
# print np.mean(X_test[:,0:3], axis=0), np.std(X_test[:,0:3], axis=0)

# with(open('train_dummy.txt', mode='w')) as infile:
#     for i in range(X_train.shape[0]):
#         s=''
#         for feat in range(3):
#             s = s + '%g '%X_train[i,feat]
#         infile.write('%s\n'%s)
Пример #7
0
        index = ((step-1)*batch_size + i) % len(_train)
        batch_s[i] = _train[index] 

    return batch_s




X_train_signals_paths = [
    os.path.join(input_dir, 'train', axi) for axi in axis
]
X_test_signals_paths = [
    os.path.join(input_dir, 'test', axi) for axi in axis
]

X_train = load_X(X_train_signals_paths)
X_test = load_X(X_test_signals_paths)

y_train_path = os.path.join(input_dir, 'train', 'classification')
y_test_path = os.path.join(input_dir, 'test', 'classification')

y_train = load_Y(y_train_path)
y_test = load_Y(y_test_path)

# Input Data 

training_data_count = len(X_train)  # 7352 training series (with 50% overlap between each serie)
test_data_count = len(X_test)  # 2947 testing series
n_steps = len(X_train[0])  # 128 timesteps per series
n_input = len(X_train[0][0])  # 9 input parameters per timestep
__author__ = 'giulio'
import utils as ut
import features_selection as fs
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import scale

if __name__ == "__main__":

    X, y = ut.load_X(), ut.load_y()
    print X.shape, y
    X = scale(X)
    clf = GradientBoostingClassifier()
    X = X[:, :11]
    fs.exaustive_selection(clf, X, y, fold=StratifiedKFold(y, n_folds=5))

__author__ = 'giulio'
from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier
import utils as ut
import numpy as np
import os
from sklearn.svm import SVC
from sklearn.preprocessing import scale, MinMaxScaler


reload(ut)

X_train, y_train = ut.load_X(), ut.load_y()

X_test = ut.load_X_test()

X = np.vstack((X_train, X_test))

X = scale(X)

X_train = X[:X_train.shape[0]]
X_test = X[X_train.shape[0]:]

# mms = MinMaxScaler()
# X = mms.fit_transform(X)

clf1 = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
clf2 = GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance',
              max_depth=3, max_features=None, max_leaf_nodes=None,
              min_samples_leaf=1, min_samples_split=2, n_estimators=100,
Пример #10
0
def get_subset(label):
    feature = load_X(
        '/home/t-yud/ZSL/data/ImageNet/res101_1crop_feature/{}.bin'.format(
            label))
    feature = torch.from_numpy(feature).float().cuda()
    return feature
Пример #11
0
def main(model, test_dir):
    X_test_signals_paths = [os.path.join(test_dir, axi) for axi in axis]
    Y_test_path = os.path.join(test_dir, 'classification')

    X_test = load_X(X_test_signals_paths)
    Y_test = load_Y(Y_test_path)

    n_hidden = 32
    n_classes = 5

    lambda_loss_amount = 0.0015
    learning_rate = 0.0025

    n_steps = len(X_test[0])
    n_input = len(X_test[0][0])

    print('n_steps: {} n_input: {}'.format(n_steps, n_input))
    x = tf.placeholder(tf.float32, [None, n_steps, n_input])
    y = tf.placeholder(tf.float32, [None, n_classes])

    weights = {
        'hidden':
        tf.Variable(tf.random_normal([n_input,
                                      n_hidden])),  # Hidden layer weights
        'out': tf.Variable(tf.random_normal([n_hidden, n_classes], mean=1.0))
    }
    biases = {
        'hidden': tf.Variable(tf.random_normal([n_hidden])),
        'out': tf.Variable(tf.random_normal([n_classes]))
    }

    pred = LSTM_RNN(n_input, n_steps, n_hidden, x, weights, biases)
    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    l2 = lambda_loss_amount * sum(
        tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables()
    )  # L2 loss prevents this overkill neural network to overfit the data
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            labels=y, logits=pred)) + l2  # Softmax loss
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(
        cost)  # Adam Optimizer

    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            labels=y, logits=pred)) + l2  # Softmax loss
    saver = tf.train.Saver()

    with tf.Session() as sess:
        saver.restore(sess, model)
        print('Model restored')

        loss, acc = sess.run([cost, accuracy],
                             feed_dict={
                                 x: X_test,
                                 y: one_hot(Y_test)
                             })

        print("PERFORMANCE ON TEST SET: " + \
              "Batch Loss = {}".format(loss) + \
              ", Accuracy = {}".format(acc))