예제 #1
0
__author__ = "thomas"

import numpy as np
from utils import load_X, load_y, mix, standardize, add_intercept, evaluate, evaluate1d
import matplotlib.pyplot as plt
import theano
from theano import tensor as T

PURCENT = 5  # Purcentage of the set you want on the test set
NUM_FRAMES = 60
DATADIR = "/baie/corpus/emoMusic/train/"
# DATADIR = './train/'

do_regularize = False

y_, song_id, nb_of_songs = load_y(DATADIR)
X_ = load_X(DATADIR, song_id)

# Now  let's mix everything so that we can take test_set and train_set independantly
# We need to separate PER SONG
X_train, y_train, X_test, y_test, song_id_tst = mix(X_, y_, PURCENT, NUM_FRAMES, song_id, nb_of_songs)
print X_train.shape, y_train.shape, X_test.shape, y_test.shape
# print X_train[0:3,0:3]
# print np.mean(X_train[:,0:3], axis=0), np.std(X_train[:,0:3], axis=0)
# print np.mean(X_test[:,0:3], axis=0), np.std(X_test[:,0:3], axis=0)

# with(open('train_dummy.txt', mode='w')) as infile:
#     for i in range(X_train.shape[0]):
#         s=''
#         for feat in range(3):
#             s = s + '%g '%X_train[i,feat]
예제 #2
0
__author__ = 'thomas'

import numpy as np
from utils import load_X, load_y, mix, standardize, add_intercept, evaluate, evaluate1d
import matplotlib.pyplot as plt
import theano
from theano import tensor as T

PURCENT = 5  # Purcentage of the set you want on the test set
NUM_FRAMES = 60
DATADIR = '/baie/corpus/emoMusic/train/'
# DATADIR = './train/'

do_regularize = False

y_, song_id, nb_of_songs = load_y(DATADIR)
X_ = load_X(DATADIR, song_id)

# Now  let's mix everything so that we can take test_set and train_set independantly
# We need to separate PER SONG
X_train, y_train, X_test, y_test, song_id_tst = mix(X_, y_, PURCENT,
                                                    NUM_FRAMES, song_id,
                                                    nb_of_songs)
print X_train.shape, y_train.shape, X_test.shape, y_test.shape
# print X_train[0:3,0:3]

# standardize data
X_train, scaler = standardize(X_train)
X_test, _ = standardize(X_test, scaler)

X_train = X_train[:, [
clusters_count = 10
# file = './datasets/iris/iris.data'
# file_test = './datasets/iris/iris.data'
file = './datasets/pendigits/pendigits.tra'
file_test = './datasets/pendigits/pendigits.tes'

def kmeans_ssl(clusters, neighbors):
    def fn(pipe):
        p = pipe \
            .split(5) \
                .pipe(kmeans(clusters)) \
                .y(seeding_centroids(0.1)) \
                .y(label_consensus()) \
                .pipe(knn(neighbors)) \
                .pipe(predict()) \
                .pipe(evaluate()) \
            .merge('evaluation', average('evaluation'))
        return p
    return fn

p = Pipe() \
    .x(load_x(file)) \
    .y(load_y(file)) \
    .x_test(load_x(file_test))\
    .y_test(load_y(file_test))\
    .connect(start_timer()) \
    .connect(kmeans_ssl(clusters=clusters_count, neighbors=1)) \
    .connect(stop_timer()) \
    .pipe(dump('evaluation'))
예제 #4
0
lam = args.lam
subsample_max = args.subsample_max
cheat_mode = args.cheat_mode
c_train = 2.0 / (num_reviewer * num_paper)

logger = set_logger(
    "detect_tpr",
    "{}/detect_tpr/log_detect_tpr_collusion_{}_top_{}_{}_lam_{}_subsample_max_{}_seed_{}.txt"
    .format(args.output_dir, L_attack, K, cheat_mode, lam, subsample_max,
            args.seed))
logger.info(args)

#1. init data
X_csr_s = []
H_inv_s = []
y, y_train = load_y(hashed_ratio, logger, subsample_max=subsample_max)
preds_s = []
for seed in seeds:
    X_csr, H_inv = load_X_and_H_inv(hashed_ratio, seed, logger, lam)
    preds = load_preds(X_csr,
                       y_train,
                       H_inv,
                       hashed_ratio,
                       seed,
                       logger,
                       lam,
                       subsample_max=subsample_max)
    X_csr_s.append(X_csr)
    H_inv_s.append(H_inv)
    preds_s.append(preds)
    del X_csr, H_inv, preds
from multipipetools import average
from pipe import Pipe
from pipetools import *
from ssltools import *
from utils import load_x, load_y
from wrapper import agglomerative_l_method, knn

file = './datasets/iris/iris.data'
# file = './datasets/pendigits/pendigits.tra'
points = load_x(file, delimiter=',')
target = load_y(file, delimiter=',')

def l_method(neighbors):
    def fn(pipe):
        p = pipe \
            .split(5) \
                .pipe(agglomerative_l_method()) \
                .pipe(copy('y', 'y_bak')) \
                .y(seeding_random(0.1)) \
                .y(label_consensus()) \
                .pipe(knn(neighbors)) \
                .pipe(predict()) \
                .pipe(copy('y_bak', 'y')) \
                .pipe(evaluate()) \
            .merge('evaluation', average('evaluation'))
        return p
    return fn

p = Pipe() \
    .x(points) \
    .y(target) \
예제 #6
0
from utils import load_x, load_y
from wrapper import *

file = './datasets/pendigits/pendigits.tra'
file_test = './datasets/pendigits/pendigits.tes'

X = load_x(file)
Y = load_y(file)
X_test = load_x(file_test)
Y_test = load_y(file_test)

goodK = Pipe()\
    .x(X)\
    .y(Y)\
    .x_test(X_test)\
    .y_test(Y_test)\
    .pipe(good_K_for_KNN())\
    .connect(stop())

print('goodK:', good_K_for_KNN)
__author__ = 'giulio'
import utils as ut
import features_selection as fs
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.cross_validation import StratifiedKFold
from sklearn.preprocessing import scale

if __name__ == "__main__":

    X, y = ut.load_X(), ut.load_y()
    print X.shape, y
    X = scale(X)
    clf = GradientBoostingClassifier()
    X = X[:, :11]
    fs.exaustive_selection(clf, X, y, fold=StratifiedKFold(y, n_folds=5))

__author__ = 'giulio'
from sklearn.ensemble import GradientBoostingClassifier, ExtraTreesClassifier
import utils as ut
import numpy as np
import os
from sklearn.svm import SVC
from sklearn.preprocessing import scale, MinMaxScaler


reload(ut)

X_train, y_train = ut.load_X(), ut.load_y()

X_test = ut.load_X_test()

X = np.vstack((X_train, X_test))

X = scale(X)

X_train = X[:X_train.shape[0]]
X_test = X[X_train.shape[0]:]

# mms = MinMaxScaler()
# X = mms.fit_transform(X)

clf1 = SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0,
  kernel='rbf', max_iter=-1, probability=True, random_state=None,
  shrinking=True, tol=0.001, verbose=False)
clf2 = GradientBoostingClassifier(init=None, learning_rate=0.1, loss='deviance',
              max_depth=3, max_features=None, max_leaf_nodes=None,
              min_samples_leaf=1, min_samples_split=2, n_estimators=100,
from pipe import Pipe
from ssltools import *
from utils import load_x, load_y

file = './datasets/iris/iris.data'

Pipe() \
    .x(load_x(file)) \
    .y(load_y(file)) \
    .pipe(seeding_equally(0.1))