__author__ = "thomas" import numpy as np from utils import load_X, load_y, mix, standardize, add_intercept, evaluate, evaluate1d import matplotlib.pyplot as plt import theano from theano import tensor as T PURCENT = 5 # Purcentage of the set you want on the test set NUM_FRAMES = 60 DATADIR = "/baie/corpus/emoMusic/train/" # DATADIR = './train/' do_regularize = False y_, song_id, nb_of_songs = load_y(DATADIR) X_ = load_X(DATADIR, song_id) # Now let's mix everything so that we can take test_set and train_set independantly # We need to separate PER SONG X_train, y_train, X_test, y_test, song_id_tst = mix(X_, y_, PURCENT, NUM_FRAMES, song_id, nb_of_songs) print X_train.shape, y_train.shape, X_test.shape, y_test.shape # print X_train[0:3,0:3] # print np.mean(X_train[:,0:3], axis=0), np.std(X_train[:,0:3], axis=0) # print np.mean(X_test[:,0:3], axis=0), np.std(X_test[:,0:3], axis=0) # with(open('train_dummy.txt', mode='w')) as infile: # for i in range(X_train.shape[0]): # s='' # for feat in range(3): # s = s + '%g '%X_train[i,feat]
__author__ = 'thomas' import numpy as np from utils import load_X, load_y, mix, standardize, add_intercept, evaluate, evaluate1d import matplotlib.pyplot as plt import theano from theano import tensor as T PURCENT = 5 # Purcentage of the set you want on the test set NUM_FRAMES = 60 DATADIR = '/baie/corpus/emoMusic/train/' # DATADIR = './train/' do_regularize = False y_, song_id, nb_of_songs = load_y(DATADIR) X_ = load_X(DATADIR, song_id) # Now let's mix everything so that we can take test_set and train_set independantly # We need to separate PER SONG X_train, y_train, X_test, y_test, song_id_tst = mix(X_, y_, PURCENT, NUM_FRAMES, song_id, nb_of_songs) print X_train.shape, y_train.shape, X_test.shape, y_test.shape # print X_train[0:3,0:3] # standardize data X_train, scaler = standardize(X_train) X_test, _ = standardize(X_test, scaler) X_train = X_train[:, [
clusters_count = 10 # file = './datasets/iris/iris.data' # file_test = './datasets/iris/iris.data' file = './datasets/pendigits/pendigits.tra' file_test = './datasets/pendigits/pendigits.tes' def kmeans_ssl(clusters, neighbors): def fn(pipe): p = pipe \ .split(5) \ .pipe(kmeans(clusters)) \ .y(seeding_centroids(0.1)) \ .y(label_consensus()) \ .pipe(knn(neighbors)) \ .pipe(predict()) \ .pipe(evaluate()) \ .merge('evaluation', average('evaluation')) return p return fn p = Pipe() \ .x(load_x(file)) \ .y(load_y(file)) \ .x_test(load_x(file_test))\ .y_test(load_y(file_test))\ .connect(start_timer()) \ .connect(kmeans_ssl(clusters=clusters_count, neighbors=1)) \ .connect(stop_timer()) \ .pipe(dump('evaluation'))
lam = args.lam subsample_max = args.subsample_max cheat_mode = args.cheat_mode c_train = 2.0 / (num_reviewer * num_paper) logger = set_logger( "detect_tpr", "{}/detect_tpr/log_detect_tpr_collusion_{}_top_{}_{}_lam_{}_subsample_max_{}_seed_{}.txt" .format(args.output_dir, L_attack, K, cheat_mode, lam, subsample_max, args.seed)) logger.info(args) #1. init data X_csr_s = [] H_inv_s = [] y, y_train = load_y(hashed_ratio, logger, subsample_max=subsample_max) preds_s = [] for seed in seeds: X_csr, H_inv = load_X_and_H_inv(hashed_ratio, seed, logger, lam) preds = load_preds(X_csr, y_train, H_inv, hashed_ratio, seed, logger, lam, subsample_max=subsample_max) X_csr_s.append(X_csr) H_inv_s.append(H_inv) preds_s.append(preds) del X_csr, H_inv, preds
from multipipetools import average from pipe import Pipe from pipetools import * from ssltools import * from utils import load_x, load_y from wrapper import agglomerative_l_method, knn file = './datasets/iris/iris.data' # file = './datasets/pendigits/pendigits.tra' points = load_x(file, delimiter=',') target = load_y(file, delimiter=',') def l_method(neighbors): def fn(pipe): p = pipe \ .split(5) \ .pipe(agglomerative_l_method()) \ .pipe(copy('y', 'y_bak')) \ .y(seeding_random(0.1)) \ .y(label_consensus()) \ .pipe(knn(neighbors)) \ .pipe(predict()) \ .pipe(copy('y_bak', 'y')) \ .pipe(evaluate()) \ .merge('evaluation', average('evaluation')) return p return fn p = Pipe() \ .x(points) \ .y(target) \
from utils import load_x, load_y from wrapper import * file = './datasets/pendigits/pendigits.tra' file_test = './datasets/pendigits/pendigits.tes' X = load_x(file) Y = load_y(file) X_test = load_x(file_test) Y_test = load_y(file_test) goodK = Pipe()\ .x(X)\ .y(Y)\ .x_test(X_test)\ .y_test(Y_test)\ .pipe(good_K_for_KNN())\ .connect(stop()) print('goodK:', good_K_for_KNN)
__author__ = 'giulio' import utils as ut import features_selection as fs from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier from sklearn.cross_validation import StratifiedKFold from sklearn.preprocessing import scale if __name__ == "__main__": X, y = ut.load_X(), ut.load_y() print X.shape, y X = scale(X) clf = GradientBoostingClassifier() X = X[:, :11] fs.exaustive_selection(clf, X, y, fold=StratifiedKFold(y, n_folds=5))
__author__ = 'giulio' from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier import utils as ut import numpy as np import os from sklearn.svm import SVC from sklearn.preprocessing import scale, MinMaxScaler reload(ut) X_train, y_train = ut.load_X(), ut.load_y() X_test = ut.load_X_test() X = np.vstack((X_train, X_test)) X = scale(X) X_train = X[:X_train.shape[0]] X_test = X[X_train.shape[0]:] # mms = MinMaxScaler() # X = mms.fit_transform(X) clf1 = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0, kernel='rbf', max_iter=-1, probability=True, random_state=None, shrinking=True, tol=0.001, verbose=False) clf2 = GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance', max_depth=3, max_features=None, max_leaf_nodes=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100,
from pipe import Pipe from ssltools import * from utils import load_x, load_y file = './datasets/iris/iris.data' Pipe() \ .x(load_x(file)) \ .y(load_y(file)) \ .pipe(seeding_equally(0.1))