示例#1
0
def test_additive_chi2_sampler():
    # test that AdditiveChi2Sampler approximates kernel on random data

    # compute exact kernel
    # abbreviations for easier formula
    X_ = X[:, np.newaxis, :]
    Y_ = Y[np.newaxis, :, :]

    large_kernel = 2 * X_ * Y_ / (X_ + Y_)

    # reduce to n_samples_x x n_samples_y by summing over features
    kernel = (large_kernel.sum(axis=2))

    # approximate kernel mapping
    transform = AdditiveChi2Sampler(sample_steps=3)
    X_trans = transform.fit_transform(X)
    Y_trans = transform.transform(Y)

    kernel_approx = np.dot(X_trans, Y_trans.T)

    assert_array_almost_equal(kernel, kernel_approx, 1)

    X_sp_trans = transform.fit_transform(csr_matrix(X))
    Y_sp_trans = transform.transform(csr_matrix(Y))

    assert_array_equal(X_trans, X_sp_trans.A)
    assert_array_equal(Y_trans, Y_sp_trans.A)

    # test error is raised on negative input
    Y_neg = Y.copy()
    Y_neg[0, 0] = -1
    assert_raises(ValueError, transform.transform, Y_neg)

    # test error on invalid sample_steps
    transform = AdditiveChi2Sampler(sample_steps=4)
    assert_raises(ValueError, transform.fit, X)

    # test that the sample interval is set correctly
    sample_steps_available = [1, 2, 3]
    for sample_steps in sample_steps_available:

        # test that the sample_interval is initialized correctly
        transform = AdditiveChi2Sampler(sample_steps=sample_steps)
        assert transform.sample_interval is None

        # test that the sample_interval is changed in the fit method
        transform.fit(X)
        assert transform.sample_interval_ is not None

    # test that the sample_interval is set correctly
    sample_interval = 0.3
    transform = AdditiveChi2Sampler(sample_steps=4,
                                    sample_interval=sample_interval)
    assert transform.sample_interval == sample_interval
    transform.fit(X)
    assert transform.sample_interval_ == sample_interval
示例#2
0
class _AdditiveChi2SamplerImpl:
    def __init__(self, **hyperparams):
        self._hyperparams = hyperparams
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if y is not None:
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
def test_additive_chi2_sampler():
    """test that AdditiveChi2Sampler approximates kernel on random data"""

    # compute exact kernel
    # appreviations for easier formular
    X_ = X[:, np.newaxis, :]
    Y_ = Y[np.newaxis, :, :]

    large_kernel = 2 * X_ * Y_ / (X_ + Y_)

    # reduce to n_samples_x x n_samples_y by summing over features
    kernel = (large_kernel.sum(axis=2))

    # appoximate kernel mapping
    transform = AdditiveChi2Sampler(sample_steps=3)
    X_trans = transform.fit_transform(X)
    Y_trans = transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    assert_array_almost_equal(kernel, kernel_approx, 1)

    X_sp_trans = transform.fit_transform(csr_matrix(X))
    Y_sp_trans = transform.transform(csr_matrix(Y))

    assert_array_equal(X_trans, X_sp_trans.A)
    assert_array_equal(Y_trans, Y_sp_trans.A)
示例#4
0
 def fit(self, X, y=None):
     self._sklearn_model = SKLModel(**self._hyperparams)
     if (y is not None):
         self._sklearn_model.fit(X, y)
     else:
         self._sklearn_model.fit(X)
     return self
示例#5
0
def test_additivechi2sampler_get_feature_names_out():
    """Check get_feature_names_out for AdditiveChi2Sampler."""
    rng = np.random.RandomState(0)
    X = rng.random_sample(size=(300, 3))

    chi2_sampler = AdditiveChi2Sampler(sample_steps=3).fit(X)
    input_names = ["f0", "f1", "f2"]
    suffixes = [
        "f0_sqrt",
        "f1_sqrt",
        "f2_sqrt",
        "f0_cos1",
        "f1_cos1",
        "f2_cos1",
        "f0_sin1",
        "f1_sin1",
        "f2_sin1",
        "f0_cos2",
        "f1_cos2",
        "f2_cos2",
        "f0_sin2",
        "f1_sin2",
        "f2_sin2",
    ]

    names_out = chi2_sampler.get_feature_names_out(input_features=input_names)
    expected_names = [f"additivechi2sampler_{suffix}" for suffix in suffixes]
    assert_array_equal(names_out, expected_names)
示例#6
0
class AdditiveChi2SamplerImpl():
    def __init__(self, sample_steps=2, sample_interval=None):
        self._hyperparams = {
            'sample_steps': sample_steps,
            'sample_interval': sample_interval
        }
        self._wrapped_model = Op(**self._hyperparams)

    def fit(self, X, y=None):
        if (y is not None):
            self._wrapped_model.fit(X, y)
        else:
            self._wrapped_model.fit(X)
        return self

    def transform(self, X):
        return self._wrapped_model.transform(X)
示例#7
0
def test_additive_chi2_sampler_exceptions():
    """Ensures correct error message"""
    transformer = AdditiveChi2Sampler()
    X_neg = X.copy()
    X_neg[0, 0] = -1
    with pytest.raises(ValueError, match="X in AdditiveChi2Sampler.fit"):
        transformer.fit(X_neg)
    with pytest.raises(ValueError, match="X in AdditiveChi2Sampler.transform"):
        transformer.fit(X)
        transformer.transform(X_neg)
示例#8
0
def test_input_validation():
    # Regression test: kernel approx. transformers should work on lists
    # No assertions; the old versions would simply crash
    X = [[1, 2], [3, 4], [5, 6]]
    AdditiveChi2Sampler().fit(X).transform(X)
    SkewedChi2Sampler().fit(X).transform(X)
    RBFSampler().fit(X).transform(X)

    X = csr_matrix(X)
    RBFSampler().fit(X).transform(X)
示例#9
0
def find_chord(model, file, code):
    fs, y = scipy.io.wavfile.read(file)
    y = bandpass_filter(y, 20, 7000, fs, order=5)
    X = mPCP(y, fs)
    sampler = AdditiveChi2Sampler()
    X = np.array([X])
    if code == 1:
        X = sampler.fit_transform(X)
    pred = model.predict(X)
    return NtoC(pred[0])
示例#10
0
def approx_kernel(kernel_structure,data_x,data_y):
    #print("Approx kernel")
    #pdb.set_trace()
    if kernel_structure.iloc[0].loc['kernel_type']=='RBF':
        #pdb.set_trace()
        rbf_feature = RBFSampler(gamma=1,n_components=10,random_state=1)
        X_features = rbf_feature.fit_transform(data_x)
    if kernel_structure.iloc[0].loc['kernel_type']=='ACHI2':
        chi2sampler = AdditiveChi2Sampler(sample_steps=10,sample_interval=1)
        X_features = chi2sampler.fit_transform(X, y)
    #todo implement the other methods
    return X_features
示例#11
0
def transform_chi2(data):
    chi2 = AdditiveChi2Sampler(sample_steps=2)
    if isinstance(data.X[0], np.ndarray):
        X_new = [chi2.fit_transform(x).astype(np.float32) for x in data.X]
    elif len(data.X[0]) == 2:
        X_new = [(chi2.fit_transform(x[0]), x[1]) for x in data.X]
    elif len(data.X[0]) == 3:
        X_new = [(chi2.fit_transform(x[0]), x[1], x[2]) for x in data.X]
    else:
        raise ValueError("len(x) is weird: %d" % len(data.X[0]))

    return DataBunch(X_new, data.Y, data.file_names, data.superpixels)
def find_chord(model, file, code):
    fs, y = scipy.io.wavfile.read(file)
    y = bandpass_filter(y, 20, 7000, fs, order=5)
    X = mPCP(y, fs).reshape(1, -1)
    sampler = AdditiveChi2Sampler()
    if sum(X.ravel()) == 0:
        return '__'
    if code == 1:
        X = sampler.fit_transform(X)
    pred = model.predict(X)
    # print(pred)
    return NtoC(pred[0])
示例#13
0
def approx_kernel(kernel_structure,data_x,data_y):

    print("A")
    pdb.set_trace()
    if kernel_structure.iloc[0].loc['kernel_type']=='RBF':
        pdb.set_trace()
        rbf_feature = RBFSampler(gamma=1, random_state=1)
        X_features = rbf_feature.fit_transform(data_x)
    if kernel_structure.iloc[0].loc['kernel_type']=='ACHI2':
        chi2sampler = AdditiveChi2Sampler(sample_steps=10,sample_interval=1)
        X_features = chi2sampler.fit_transform(X, y)
    print(X_features)

    return X_features
def test_additive_chi2_sampler():
    """test that AdditiveChi2Sampler approximates kernel on random data"""

    # compute exact kernel
    # appreviations for easier formular
    X_ = X[:, np.newaxis, :]
    Y_ = Y[np.newaxis, :, :]

    large_kernel = 2 * X_ * Y_ / (X_ + Y_)

    # reduce to n_samples_x x n_samples_y by summing over features
    kernel = (large_kernel.sum(axis=2))

    # approximate kernel mapping
    transform = AdditiveChi2Sampler(sample_steps=3)
    X_trans = transform.fit_transform(X)
    Y_trans = transform.transform(Y)

    kernel_approx = np.dot(X_trans, Y_trans.T)

    assert_array_almost_equal(kernel, kernel_approx, 1)

    X_sp_trans = transform.fit_transform(csr_matrix(X))
    Y_sp_trans = transform.transform(csr_matrix(Y))

    assert_array_equal(X_trans, X_sp_trans.A)
    assert_array_equal(Y_trans, Y_sp_trans.A)

    # test error is raised on negative input
    Y_neg = Y.copy()
    Y_neg[0, 0] = -1
    assert_raises(ValueError, transform.transform, Y_neg)

    # test error on invalid sample_steps
    transform = AdditiveChi2Sampler(sample_steps=4)
    assert_raises(ValueError, transform.fit, X)
    def generate_data_transformers(self):
        # Data Transformation (Scaling, Normalization)
        if self.data_transform:
            if self.data_transform == 'EXP':
                transformer = ''
                transformer.name = ''

            elif data_transform == 'NORM':
                pass

            transformer.params = utils.get_params_string(
                self.data_transform_params)
            self.transformer = transformer

        # Feature Selection (Var, Chi^2)
        if self.feature_selection:
            if self.feature_selection == 'VAR':
                selector = VarianceThreshold(**self.feature_selection_params)
                selector.name = 'VarianceThreshold'

            elif self.feature_selection == 'CHI2':
                pass

            selector.params = utils.get_params_string(
                self.feature_selection_params)
            self.selector = selector

        # Kernel Approximation (RBF, Chi^2)
        if self.approximation_kernel:
            if self.approximation_kernel == 'RBF':
                approx_kernel_map = RBFSampler(
                    **self.kernel_approximation_params)
                approx_kernel_map.name = 'RBFSampler'

            elif self.approximation_kernel == 'CHI2':
                approx_kernel_map = AdditiveChi2Sampler(
                    **self.kernel_approximation_params)
                approx_kernel_map.name = 'AdditiveChi2Sampler'

            approx_kernel_map.params = utils.get_params_string(
                self.kernelapproximation_params)
            self.approx_kernel_map = approx_kernel_map
示例#16
0
def a_chi(df, drop=None, lags=1, sample_steps=2):

    if drop:
        keep = df[drop]
        df = df.drop([drop], axis=1)

    df_2 = df.shift(lags)
    df = df.iloc[lags:, :]
    df_2 = df_2.dropna().reset_index(drop=True)

    chi2sampler = AdditiveChi2Sampler(sample_steps=sample_steps)

    df_2 = chi2sampler.fit_transform(df_2, df["Close"])

    df_2 = pd.DataFrame(df_2, index=df.index)
    df_2 = df.add_prefix('achi_')

    if drop:
        df = pd.concat([keep, df, df_2], axis=1)
    else:
        df = pd.concat([df, df_2], axis=1)
    return df
示例#17
0
def train_svm(C=0.1, grid=False):
    pascal = PascalSegmentation()

    files_train = pascal.get_split("kTrain")
    superpixels = [
        slic_n(pascal.get_image(f), n_superpixels=100, compactness=10)
        for f in files_train
    ]
    bow = SiftBOW(pascal, n_words=1000, color_sift=True)
    data_train = bow.fit_transform(files_train, superpixels)

    data_train = add_global_descriptor(data_train)

    svm = LinearSVC(C=C, dual=False, class_weight='auto')
    chi2 = AdditiveChi2Sampler()

    X, y = np.vstack(data_train.X), np.hstack(data_train.Y)
    X = chi2.fit_transform(X)
    svm.fit(X, y)
    print(svm.score(X, y))
    eval_on_sp(pascal,
               data_train,
               [svm.predict(chi2.transform(x)) for x in data_train.X],
               print_results=True)

    files_val = pascal.get_split("kVal")
    superpixels_val = [
        slic_n(pascal.get_image(f), n_superpixels=100, compactness=10)
        for f in files_val
    ]
    data_val = bow.transform(files_val, superpixels_val)
    data_val = add_global_descriptor(data_val)
    eval_on_sp(pascal,
               data_val, [svm.predict(chi2.transform(x)) for x in data_val.X],
               print_results=True)

    tracer()
示例#18
0
def chi_squared_projection(features):
    chi2_feature = AdditiveChi2Sampler()
    X_transformed = chi2_feature.fit_transform(features)
    X_transformed = X_transformed.tocsr()

    return X_transformed
示例#19
0
while i < 12:
    X[:, i] = data_set[str(i)]
    i += 1
# Manually creating label values according to data per chord
# It is assumed that the chords are listed in the order
# A, Am, Bm, C, D, Dm, E, Em, F, G in the dataset
y = np.zeros((X.shape)[0])
counter = 0
value = 1
data_per_chord = 200
for i in range(0, (X.shape)[0]):
    if counter == data_per_chord:
        value += 1
        counter = 0
    y[i] = value
    counter += 1
sampler = AdditiveChi2Sampler()
# Comment the above sampler and uncomment the lower one to change kernels

#sampler = RBFSampler(gamma=1, random_state=1)

X = sampler.fit_transform(X)
model.fit(X, y)
filename = 'trained_ML_model_ver3.sav'
# Fit and save the model with filename
pickle.dump(model, open(filename, 'wb'))
# Load back the model to test for training accuracy
myModel = pickle.load(open('trained_ML_model_ver3.sav', 'rb'))
pred = myModel.predict(X)
print(accuracy_score(pred, y))
示例#20
0
def gen_pipeline(args):
    """Generating pipeline of results based on grid search parameters required. """
    #TODO include argument for paramgrid as json for further use and refactor code into a simplified loop.
    if args.classifier.lower() == 'log_reg':
        param_grid = [{
            'ovr__solver': ['saga'],
            'ovr__penalty': ['l1', 'l2'],
            'ovr__C': np.logspace(0, 4, 10),
            'ovr__multi_class': ['ovr', 'multinomial']
        }, {
            'ovr__solver': ['saga'],
            'ovr__penalty': ['elasticnet'],
            'ovr__C': np.logspace(0, 4, 10),
            'ovr__multi_class': ['ovr', 'multinomial'],
            'ovr__l1_ratio': np.array([0.1, 0.3, 0.5, 0.9])
        }, {
            'ovr__solver': ['sag'],
            'ovr__penalty': ['l2'],
            'ovr__C': np.logspace(0, 4, 10),
            'ovr__multi_class': ['ovr', 'multinomial']
        }]
        OVR_pipe = Pipeline([
            ('ovr', LogisticRegression(random_state=0, max_iter=1000)),
        ])

    elif args.classifier.lower() == 'svm_nystrom':
        #

        param_grid = [{
            'nystreum__gamma': [100, 10, 1, 0.1],
            'nystreum__n_components': [300, 60, 11],
            'nystreum__kernel': ['rbf'],
            'ovr__penalty': ['l1', 'l2'],
            'ovr__loss': ['hinge', 'modified_huber', 'perceptron']
        }, {
            'nystreum__gamma': [100, 10, 1, 0.1],
            'nystreum__n_components': [300, 60, 11],
            'nystreum__kernel': ['sigmoid', 'polynomial'],
            'ovr__penalty': ['l2'],
            'ovr__loss': ['hinge', 'modified_huber', 'perceptron']
        }]

        OVR_pipe = Pipeline(
            [
                ('nystreum', Nystroem(random_state=1)),
                ('ovr', SGDClassifier(max_iter=5000, tol=1e-3)),
            ]
        )  #BaggingClassifier(SVC(random_state=0,max_iter=1000),n_estimators=50)

    elif args.classifier.lower() == 'svm_linear':
        param_grid = {
            'ovr__base_estimator__C': [10, 100, 1000],
            'ovr__base_estimator__kernel': ['linear']
        }

        svc_pipe = Pipeline([
            ('svc', SVC()),
        ], verbose=True)

        OVR_pipe = Pipeline([
            ('ovr', BaggingClassifier(svc_pipe)),
        ],
                            verbose=True)

    elif args.classifier.lower() == 'svm_chi':
        param_grid = [{
            'chi_sqr__sample_steps': [1, 2, 3],
            'ovr__penalty': ['l1', 'l2'],
            'ovr__loss': ['hinge', 'modified_huber', 'perceptron']
        }]

        OVR_pipe = Pipeline([
            ('chi_sqr', AdditiveChi2Sampler()),
            ('ovr', SGDClassifier(max_iter=5000, tol=1e-3)),
        ])

    else:
        raise Exception(
            "Grid seach is only possible for SVM and Logistic regression classifiers."
        )
    #ipdb.set_trace()
    if args.cls_weights_bool == True:
        tmp_dict = {'ovr__class_weight': ['balanced']}
        [x.update(tmp_dict) for x in param_grid]

    return OVR_pipe, param_grid
示例#21
0
    else:  # soundnet feature
        #  X = numpy.loadtxt(os.path.join('soundnetfeat', 'result_{}.csv'.format(feat_type.split('_')[1])), delimiter=',')
        X = numpy.loadtxt(os.path.join('soundnetfeat', 'result_08.csv'),
                          delimiter=',')
#        for c in ['04', '06', '08']:
#            X_cur = numpy.loadtxt(os.path.join('soundnetfeat', 'result_{}.csv'.format(c)), delimiter=',')
#            X = numpy.concatenate((X, X_cur), axis=1)
#
    val_file = [line.strip() for line in open('../all_val.lst', 'r')]
    test_file = [line.strip() for line in open('../all_test_fake.lst', 'r')]
    val_X = X[-len(val_file) - len(test_file):-len(test_file)]
    test_X = X[-len(test_file):]

    clf = pickle.load(open(model_file, 'rb'))
    if feat_type == 'mfcc':
        chi_feature = AdditiveChi2Sampler(sample_steps=2)
        val_X = chi_feature.fit_transform(val_X)
        test_X = chi_feature.fit_transform(test_X)

    val_conf = clf.decision_function(val_X)
    test_conf = clf.decision_function(test_X)

    output_dir = output_file.split('/')[0]
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)

    numpy.savetxt(output_file, val_conf, fmt='%2.4f')

    test_file_name = '_'.join(output_file.split('/')[1].split('_')[:2]).upper()

    test_output_file = os.path.join(output_dir,
示例#22
0
 def __init__(self, **hyperparams):
     self._hyperparams = hyperparams
     self._wrapped_model = Op(**self._hyperparams)
示例#23
0
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.manifold.spectral_embedding_ import SpectralEmbedding
from sklearn.preprocessing.data import StandardScaler
from sklearn.manifold.t_sne import TSNE
from sklearn.linear_model.theil_sen import TheilSenRegressor
from sklearn.mixture.dpgmm import VBGMM
from sklearn.feature_selection.variance_threshold import VarianceThreshold

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)


clf_dict = {'ARDRegression':ARDRegression(),
			'AdaBoostClassifier':AdaBoostClassifier(),
			'AdaBoostRegressor':AdaBoostRegressor(),
			'AdditiveChi2Sampler':AdditiveChi2Sampler(),
			'AffinityPropagation':AffinityPropagation(),
			'AgglomerativeClustering':AgglomerativeClustering(),
			'BaggingClassifier':BaggingClassifier(),
			'BaggingRegressor':BaggingRegressor(),
			'BayesianGaussianMixture':BayesianGaussianMixture(),
			'BayesianRidge':BayesianRidge(),
			'BernoulliNB':BernoulliNB(),
			'BernoulliRBM':BernoulliRBM(),
			'Binarizer':Binarizer(),
			'Birch':Birch(),
			'CCA':CCA(),
			'CalibratedClassifierCV':CalibratedClassifierCV(),
			'DBSCAN':DBSCAN(),
			'DPGMM':DPGMM(),
			'DecisionTreeClassifier':DecisionTreeClassifier(),
示例#24
0
    ############################
    # Compute spatial histograms
    ############################
    if VERBOSE: print str(datetime.now()) + ' start computing hists'
    if (not exists(conf.histPath)) | OVERWRITE:
        hists = birdid_utils.computeHistograms(all_images, model, conf)
        savemat(conf.histPath, {'hists': hists})
    else:
        if VERBOSE: print 'using old hists from ' + conf.histPath
        hists = loadmat(conf.histPath)['hists']

    #####################
    # Compute feature map
    #####################
    if VERBOSE: print str(datetime.now()) + ' start computing feature map'
    transformer = AdditiveChi2Sampler()
    histst = transformer.fit_transform(hists)
    train_data = histst[selTrain]
    test_data = histst[selTest]

    ###########
    # Train SVM
    ###########
    if (not exists(conf.modelPath)) | OVERWRITE:
        if VERBOSE: print str(datetime.now()) + ' training liblinear svm'
        if VERBOSE == 'SVM':
            verbose = True
        else:
            verbose = False
        clf = svm.LinearSVC(C=conf.svm.C)
        if VERBOSE: print clf
    valid_datagen = ImageDataGenerator()

    train_data_dir = cwd + '/data/sorted/train'
    valid_data_dir = cwd + '/data/sorted/valid'
    test_data_dir = cwd + '/data/sorted/test'
    
    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='sparse'
        )

    train_data_n = len(os.listdir(train_data_dir + '/1')) + len(os.listdir(train_data_dir + '/0')) + len(os.listdir(train_data_dir + '/2'))

    chi_feature = AdditiveChi2Sampler()

    clf = SGDClassifier(class_weight={0:1.0, 1:1.2, 2:1.0})

    classes_ = np.array([0, 1, 2])

    rbf_feature = RBFSampler(gamma=4.0, n_components=3000)
    #rbf_feature = Nystroem(n_components=100, gamma=1.0, random_state=1)

    """
    feature_train_stack = np.zeros((100, 2048)) - 1
    label_train_stack = np.zeros((100, 1)) - 1
    for i in range(train_data_n // batch_size):
    #for i in range(2):
        print("======= data reading! =======")
        print("batch No." + str(i) )
示例#26
0
def main():
    VOCABULARY_SIZE = 1000
    STEP_SIZE = 4
    bow = BagOfWordsDescriptor(const.IMAGE_SIZE, VOCABULARY_SIZE, STEP_SIZE, scale_data=False)

    data = []
    target = []

    # for entry in list(os.scandir(const.PATH_TO_ROOT_UECFOOD256))[0:4]:
    for entry in os.scandir(const.PATH_TO_ROOT_UECFOOD256):
        if entry.is_dir(follow_symlinks=False):
            bb_info = []
            read_bb_info_txt(entry.path + "/bb_info.txt", bb_info)
            df = pd.DataFrame(bb_info, columns=['_img_name', '_x1', '_y1', '_x2', '_y2', '_cat', '_abs_path'])

            label = int(entry.name)

            print(label)

            # for image_path in list(glob.iglob(entry.path + '/*.jpg', recursive=False))[0:25]:
            for image_path in glob.iglob(entry.path + '/*.jpg', recursive=False):
                filename_without_jpg = int(os.path.basename(image_path).replace(".jpg", ''))
                gt_bboxes = df.loc[df._img_name == filename_without_jpg].as_matrix(["_x1", "_y1", "_x2", "_y2"])

                image = imread(image_path)

                for bbox in gt_bboxes:
                    # print(bbox)

                    sub_image = get_sub_image_from_rectangle(image, bbox, True)
                    sub_image = resize(sub_image, const.IMAGE_SIZE)

                    data.append(bow.get_feature(sub_image))
                    target.append(label)

    print(len(data), len(target))

    X, y = bow.post_process_data(data, target)

    print("X (type: %s) shape: %s || target (type: %s) shape: %s" % (X.dtype, X.shape, y.dtype, y.shape))

    # "Free memory" to avoid MemoryError
    data = []
    bow = []
    target = []
    print("gc.collect() = ", gc.collect())

    chi2 = AdditiveChi2Sampler(sample_steps=2)

    X = chi2.fit_transform(X)
    X = scale(X)

    print("X (type: %s) shape: %s || target (type: %s) shape: %s" % (X.dtype, X.shape, y.dtype, y.shape))
    classifier = LinearSVC(fit_intercept=False, dual=False)

    print(classifier)

    cv_scores = cross_val_multiple_scores(classifier,
                                          X=X,
                                          y=y,
                                          n_folds=10,
                                          n_jobs=1)

    print(cv_scores)

    save_object(cv_scores['cv_confusion_matrix'],
                "cm_bow",
                overwrite=True)
示例#27
0
 def __init__(self, sample_steps=2, sample_interval=None):
     self._hyperparams = {
         'sample_steps': sample_steps,
         'sample_interval': sample_interval
     }
     self._wrapped_model = Op(**self._hyperparams)
示例#28
0
def main(visualize=False,
         learn=False,
         actions=None,
         subjects=None,
         n_frames=220):
    # learn = True
    # learn = False
    if actions is []:
        actions = [2]
    if subjects is []:
        subjects = [2]
    # actions = [1]
    # actions = [1, 2, 3, 4, 5]
    # subjects = [1]
    if 1:
        MHAD = True
        cam = MHADPlayer(base_dir='/Users/colin/Data/BerkeleyMHAD/',
                         kinect=1,
                         actions=actions,
                         subjects=subjects,
                         reps=[1],
                         get_depth=True,
                         get_color=True,
                         get_skeleton=True,
                         fill_images=False)
    else:
        MHAD = False
        cam = KinectPlayer(base_dir='./',
                           device=2,
                           bg_subtraction=True,
                           get_depth=True,
                           get_color=True,
                           get_skeleton=True,
                           fill_images=False)
        bg = Image.open(
            '/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_A.tif')
        bg = Image.open(
            '/Users/colin/Data/JHU_RGBD_Pose/CIRL_Background_B.tif')
        cam.bgSubtraction.backgroundModel = np.array(bg.getdata()).reshape(
            [240, 320]).clip(0, 4500)
    height, width = cam.depthIm.shape
    skel_previous = None

    # clf_geo = pickle.load(open('geodesic_svm_sorted_scaled_5class.pkl'))
    # clf_color,color_approx = pickle.load(open('color_histogram_approx_svm_5class.pkl'))
    # clf_lbp,lbp_approx = pickle.load(open('lbp_histogram_approx_svm_5class.pkl'))

    face_detector = FaceDetector()
    hand_detector = HandDetector(cam.depthIm.shape)
    curve_detector = CurveDetector(cam.depthIm.shape)

    # Video writer
    # video_writer = cv2.VideoWriter("/Users/colin/Desktop/test.avi", cv2.cv.CV_FOURCC('M','J','P','G'), 15, (320,240))

    # Save Background model
    # im = Image.fromarray(cam.depthIm.astype(np.int32), 'I')
    # im.save("/Users/Colin/Desktop/k2.png")

    # Setup pose database
    append = True
    append = False
    pose_database = PoseDatabase("PoseDatabase.pkl",
                                 learn=learn,
                                 search_joints=[0, 4, 7, 10, 13],
                                 append=append)

    # Per-joint classification
    head_features = []
    hand_features = []
    feet_features = []
    joint_features = {
        'geodesic': [None] * 14,
        'color_histograms': [None] * 14,
        'lbp': [None] * 14
    }

    # Evaluation
    accuracy_all = []
    joint_accuracy_all = []
    geo_accuracy = []
    color_accuracy = []
    lbp_accuracy = []

    frame_count = 0
    frame_rate = 2
    if not MHAD:
        cam.next(350)
    frame_prev = 0
    try:
        # if 1:
        while cam.next(frame_rate):  # and frame_count < n_frames:
            if frame_count - frame_prev > 100:
                print ""
                print "Frame #{0:d}".format(frame_count)
                frame_prev = frame_count

            if not MHAD:
                if len(cam.users) == 0:
                    continue
                else:
                    # cam.users = [np.array(cam.users[0]['jointPositions'].values())]
                    if np.any(cam.users[0][0] == -1):
                        continue
                    cam.users[0][:, 1] *= -1
                    cam.users_uv_msr = [
                        cam.camera_model.world2im(cam.users[0], [240, 320])
                    ]

            # Apply mask to image
            if MHAD:
                mask = cam.get_person(2) > 0
            else:
                mask = cam.get_person() > 0
                if np.all(mask == False):
                    continue

            im_depth = cam.depthIm
            cam.depthIm[cam.depthIm > 3000] = 0
            im_color = cam.colorIm * mask[:, :, None]
            cam.colorIm *= mask[:, :, None]
            pose_truth = cam.users[0]
            pose_truth_uv = cam.users_uv_msr[0]

            # Get bounding box around person
            box = nd.find_objects(mask)[0]
            d = 20
            # Widen box
            box = (slice(np.maximum(box[0].start-d, 0), \
              np.minimum(box[0].stop+d, height-1)), \
                slice(np.maximum(box[1].start-d, 0), \
              np.minimum(box[1].stop+d, width-1)))
            box_corner = [box[0].start, box[1].start]
            ''' ---------- ----------------------------------- --------'''
            ''' ----------- Feature Detector centric approach ---------'''
            ''' ---------- ----------------------------------- --------'''
            ''' ---- Calculate Detectors ---- '''
            # Face detection
            face_detector.run(im_color[box])
            # Skin detection
            hand_markers = hand_detector.run(im_color[box], n_peaks=3)
            # curve detection
            # curve_markers = curve_detector.run((im_depth*mask)[box], n_peaks=3)
            # Calculate LBPs ##Max P=31 for LBPs becuase of datatype
            # x = local_occupancy_pattern(cam.depthIm[box]*mask[box], [5,5,5],[3,3,3])
            # lop_texture = local_binary_pattern_depth(cam.depthIm[box]*mask[box], 10, 20, px_diff_thresh=100)*mask[box]
            # lop_markers = []#peak_local_max(lop_texture, min_distance=20, num_peaks=5, exclude_border=False)
            # lbp_texture = local_binary_pattern(cam.depthIm[box]*mask[box], 6, 20)*mask[box]
            # Calculate Geodesic Extrema
            im_pos = cam.camera_model.im2PosIm(
                cam.depthIm * mask)[box] * mask[box][:, :, None]
            geodesic_markers = geodesic_extrema_MPI(im_pos,
                                                    iterations=5,
                                                    visualize=False)
            # geodesic_markers, geo_map = geodesic_extrema_MPI(im_pos, iterations=5, visualize=True)
            geodesic_markers_pos = im_pos[geodesic_markers[:, 0],
                                          geodesic_markers[:, 1]]

            markers = list(geodesic_markers) + list(
                hand_markers)  #+ list(lop_markers) + curve_markers
            markers = np.array([list(x) for x in markers])

            if 1:
                ''' ---- Database lookup ---- '''
                pts_mean = im_pos[(im_pos != 0)[:, :, 2]].mean(0)
                if learn:
                    # Normalize pose
                    pose_uv = cam.users_uv[0]
                    if np.any(pose_uv == 0):
                        print "skip"
                        frame_count += frame_rate
                        continue
                    # print pose_truth[2], pts_mean
                    pose_database.update(pose_truth - pts_mean)

                else:
                    # Concatenate markers
                    markers = list(geodesic_markers) + hand_markers
                    # markers = list(geodesic_markers) + list(lop_markers) + curve_markers + hand_markers
                    markers = np.array([list(x) for x in markers])

                    # Normalize pose
                    pts = im_pos[markers[:, 0], markers[:, 1]]
                    pts = np.array([x for x in pts if x[0] != 0])
                    pts -= pts_mean

                    # Get closest pose
                    pose = pose_database.query(pts, knn=1)
                    # pose = pose_database.weighted_query(pts, knn=1)

                    # pose = pose_database.reverse_query(pts[:,[1,0,2]])

                    # im_pos -= pts_mean
                    # R,t = IterativeClosestPoint(pose, im_pos.reshape([-1,3])-pts_mean, max_iters=5, min_change=.001, pt_tolerance=10000)
                    # pose = np.dot(R.T, pose.T).T - t
                    # pose = np.dot(R, pose.T).T + t

                    pose += pts_mean
                    pose_uv = cam.camera_model.world2im(
                        pose, cam.depthIm.shape)

                    # Constrain
                    if 0:
                        try:
                            ''' This does worse because the joint may fall to a different part of the body (e.g. hand to torso) which throws the error upward '''

                            surface_map = nd.distance_transform_edt(
                                im_pos[:, :, 2] == 0,
                                return_distances=False,
                                return_indices=True)
                            pose_uv[:, :2] = surface_map[:, pose_uv[:, 0] -
                                                         box_corner[0],
                                                         pose_uv[:, 1] -
                                                         box_corner[1]].T + [
                                                             box_corner[0],
                                                             box_corner[1]
                                                         ]
                            pose = cam.camera_model.im2world(pose_uv)

                            # skel_current = link_length_constraints(skel_current, constraint_links, constraint_values, alpha=.5)
                            # skel_current = geometry_constraints(skel_current, joint_size, alpha=0.5)
                            # skel_current = collision_constraints(skel_current, constraint_links)
                            # embed()
                            # pose_uv_box = pose_uv - [box_corner[0], box_corner[1], 0]
                            # pose_uv_box = pose_uv_box.clip([0,0,0], [cam.depthIm.shape[0]-1, cam.depthIm.shape[1]-1, 9999])
                            # joint_size = np.array([75]*14)
                            # pose_n, pose_uv_n = ray_cast_constraints(pose, pose_uv_box, im_pos, surface_map, joint_size)
                            # print 'Pose',pose,pose_n
                            # pose = pose_n
                            # pose_uv = pose_uv_n + [box_corner[0], box_corner[1], 0]

                        except:
                            print 'error constraining'

                    # skel_previous = np.array(pose, copy=True)

            display_markers(cam.colorIm,
                            hand_markers[:2],
                            box,
                            color=(0, 250, 0))
            if len(hand_markers) > 2:
                display_markers(cam.colorIm, [hand_markers[2]],
                                box,
                                color=(0, 200, 0))
            display_markers(cam.colorIm,
                            geodesic_markers,
                            box,
                            color=(200, 0, 0))
            # display_markers(cam.colorIm, curve_markers, box, color=(0,100,100))
            # display_markers(cam.colorIm, lop_markers, box, color=(0,0,200))

            if 0:
                ''' ---------- ----------------------------------- --------'''
                ''' ---------- Feature Descriptor centric approach --------'''
                ''' ---------- ----------------------------------- --------'''
                ''' ---- Calculate Descriptors ---- '''
                hand_markers = np.array(hand_markers)
                # Geodesics
                geodesic_features = relative_marker_positions(
                    im_pos, geodesic_markers_pos[:, [1, 0, 2]])
                geodesic_features = np.sort(geodesic_features)
                # Color Histogram
                skin = skimage.exposure.rescale_intensity(
                    hand_detector.im_skin, out_range=[0, 255]).astype(np.uint8)
                color_histograms = local_histograms(
                    skin, n_bins=5, max_bound=255,
                    patch_size=11) * mask[box][:, :, None]
                # LBP Histogram
                lbp_texture = local_binary_pattern(
                    cam.depthIm[box] * mask[box], 6, 5) * mask[box]
                lbp_histograms = local_histograms(
                    lbp_texture.astype(np.uint8),
                    n_bins=10,
                    max_bound=2**6,
                    patch_size=11) * mask[box][:, :, None]
                # for i in range(10):
                # 	subplot(2,5,i+1)
                # 	imshow(lbp_histograms[:,:,i])
                ''' ---- Per Joint Learning ---- '''
                if learn:
                    for ii, i in enumerate(pose_truth_uv):
                        if i[0] != 0:
                            try:
                                if joint_features['geodesic'][ii] is None:
                                    joint_features['geodesic'][
                                        ii] = geodesic_features[i[1] -
                                                                box_corner[0],
                                                                i[0] -
                                                                box_corner[1]]
                                else:
                                    joint_features['geodesic'][ii] = np.vstack(
                                        [
                                            joint_features['geodesic'][ii],
                                            (geodesic_features[i[1] -
                                                               box_corner[0],
                                                               i[0] -
                                                               box_corner[1]])
                                        ])

                                if joint_features['color_histograms'][
                                        ii] is None:
                                    joint_features['color_histograms'][
                                        ii] = color_histograms[i[1] -
                                                               box_corner[0],
                                                               i[0] -
                                                               box_corner[1]]
                                else:
                                    joint_features['color_histograms'][
                                        ii] = np.vstack([
                                            joint_features['color_histograms']
                                            [ii],
                                            deepcopy(color_histograms[
                                                i[1] - box_corner[0],
                                                i[0] - box_corner[1]])
                                        ])

                                if joint_features['lbp'][ii] is None:
                                    joint_features['lbp'][ii] = lbp_histograms[
                                        i[1] - box_corner[0],
                                        i[0] - box_corner[1]]
                                else:
                                    joint_features['lbp'][ii] = np.vstack([
                                        joint_features['lbp'][ii],
                                        deepcopy(lbp_histograms[i[1] -
                                                                box_corner[0],
                                                                i[0] -
                                                                box_corner[1]])
                                    ])

                            except:
                                print "error"
                ''' ---- Per Joint Classification ---- '''
                if not learn:
                    try:
                        # Geodesic clasification
                        tmp = geodesic_features.reshape([-1, 6])
                        tmp = np.array([x / x[-1] for x in tmp])
                        tmp = np.nan_to_num(tmp)
                        geo_clf_map = clf_geo.predict(tmp).reshape(
                            im_pos.shape[:2]) * mask[box]
                        geo_clf_labels = geo_clf_map[
                            pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] -
                            box_corner[0],
                            pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] -
                            box_corner[1]]
                        geo_accuracy += [
                            geo_clf_labels == [0, 1, 4, 7, 10, 13]
                        ]
                        print 'G', np.mean(
                            geo_accuracy,
                            0), geo_clf_labels == [0, 1, 4, 7, 10, 13]
                        cv2.imshow('Geo',
                                   geo_clf_map / float(geo_clf_map.max()))
                    except:
                        pass

                    try:
                        # Color histogram classification
                        color_test = color_approx.transform(
                            color_histograms.reshape([-1, 5]))
                        color_clf_map = clf_color.predict(color_test).reshape(
                            im_pos.shape[:2]) * mask[box]
                        color_clf_labels = color_clf_map[
                            pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] -
                            box_corner[0],
                            pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] -
                            box_corner[1]]
                        color_accuracy += [
                            color_clf_labels == [0, 1, 4, 7, 10, 13]
                        ]
                        print 'C', np.mean(
                            color_accuracy,
                            0), color_clf_labels == [0, 1, 4, 7, 10, 13]
                        cv2.imshow('Col',
                                   color_clf_map / float(color_clf_map.max()))
                    except:
                        pass

                    try:
                        # lbp histogram classification
                        lbp_test = color_approx.transform(
                            lbp_histograms.reshape([-1, 10]))
                        lbp_clf_map = clf_lbp.predict(lbp_test).reshape(
                            im_pos.shape[:2]) * mask[box]
                        lbp_clf_labels = lbp_clf_map[
                            pose_truth_uv[[0, 1, 4, 7, 10, 13], 1] -
                            box_corner[0],
                            pose_truth_uv[[0, 1, 4, 7, 10, 13], 0] -
                            box_corner[1]]
                        lbp_accuracy += [
                            lbp_clf_labels == [0, 1, 4, 7, 10, 13]
                        ]
                        print 'L', np.mean(
                            lbp_accuracy,
                            0), lbp_clf_labels == [0, 1, 4, 7, 10, 13]
                        cv2.imshow('LBP',
                                   lbp_clf_map / float(lbp_clf_map.max()))
                    except:
                        pass

                pose_uv = pose_truth_uv
                pose = pose_truth

            # ''' ---- Accuracy ---- '''
            if 1 and not learn:
                # pose_truth = cam.users[0]
                error = pose_truth - pose
                # print "Error", error
                error_l2 = np.sqrt(np.sum(error**2, 1))
                # error_l2 = np.sqrt(np.sum(error[:,:2]**2, 1))
                joint_accuracy_all += [error_l2]
                accuracy = np.sum(error_l2 < 150) / 14.
                accuracy_all += [accuracy]
                print "Current", accuracy
                # print "Running avg:", np.mean(accuracy_all)
                # print "Joint avg (per-joint):", np.mean(joint_accuracy_all, -1)
                # print "Joint avg (overall):", np.mean(joint_accuracy_all)
            ''' --- Visualization --- '''
            cam.colorIm = display_skeletons(cam.colorIm,
                                            pose_truth_uv,
                                            skel_type='Kinect',
                                            color=(0, 255, 0))
            cam.colorIm = display_skeletons(cam.colorIm,
                                            pose_uv,
                                            skel_type='Kinect')
            cam.visualize()

            # print "Extrema:", geo_clf_map[geodesic_markers[:,0], geodesic_markers[:,1]]
            # print "Skin:", geo_clf_map[hand_markers[:,0], hand_markers[:,1]]
            # print "Skin val:", hand_detector.skin_match[hand_markers[:,0], hand_markers[:,1]]
            # hand_data += [[x[0] for x in hand_markers],
            # [x[1] for x in hand_markers],
            # list(hand_detector.skin_match[hand_markers[:,0], hand_markers[:,1]])]

            # ------------------------------------------------------------

            # video_writer.write((geo_clf_map/float(geo_clf_map.max())*255.).astype(np.uint8))
            # video_writer.write(cam.colorIm[:,:,[2,1,0]])

            frame_count += frame_rate
    except:
        pass

    print "-- Results for subject {:d} action {:d}".format(
        subjects[0], actions[0])
    print "Running avg:", np.mean(accuracy_all)
    print "Joint avg (overall):", np.mean(joint_accuracy_all)
    # print 'Done'
    if learn:
        pose_database.save()
        print 'Pose database saved'

    embed()
    return
    ''' --- Format Geodesic features ---'''
    geodesics_train = []
    geodesics_labels = []
    for i in xrange(len(joint_features['geodesic'])):
        # joint_features['geodesic'][i] = np.array([np.sort(x) for x in joint_features['geodesic'][i] if x[0] != 0])
        joint_features['geodesic'][i] = np.array(
            [x / x.max() for x in joint_features['geodesic'][i] if x[0] != 0])
        ii = i
        if i not in [0, 1, 4, 7, 10, 13]:
            ii = 1
        else:
            geodesics_labels += [
                i * np.ones(len(joint_features['geodesic'][i]))
            ]
    geodesics_train = np.vstack(
        [joint_features['geodesic'][x] for x in [0, 1, 4, 7, 10, 13]])
    # geodesics_train = np.vstack(joint_features['geodesic'])
    geodesics_labels = np.hstack(geodesics_labels)

    figure(1)
    title('Distances of each joint to first 6 geodesic extrema')
    for i in range(14):
        subplot(4, 4, i + 1)
        ylabel('Distance')
        xlabel('Sample')
        plot(joint_features['geodesic'][i])
        axis([0, 400, 0, 1600])

    # Learn geodesic classifier
    clf_geo = SGDClassifier(n_iter=10000,
                            alpha=.01,
                            n_jobs=-1,
                            class_weight='auto')
    clf_geo.fit(geodesics_train, geodesics_labels)
    print clf_geo.score(geodesics_train, geodesics_labels)
    geodesic_features = np.sort(geodesic_features)
    sgd_map = clf_geo.predict(geodesic_features.reshape([-1, 6])).reshape(
        im_pos.shape[:2])

    pickle.dump(clf_geo, open('geodesic_svm_sorted_scaled_5class.pkl', 'w'),
                pickle.HIGHEST_PROTOCOL)
    # clf_geo = pickle.load(open('geodesic_svm_sorted_scaled_5class.pkl'))
    ''' --- Color Histogram features ---'''
    color_train = []
    color_labels = []
    for i in xrange(len(joint_features['color_histograms'])):
        ii = i
        if i not in [0, 1, 4, 7, 10, 13]:
            ii = 1
        else:
            color_labels += [
                i * np.ones(len(joint_features['color_histograms'][i]))
            ]
        # color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))]
    # color_train = np.vstack(joint_features['color_histograms'])
    color_train = np.vstack(
        [joint_features['color_histograms'][x] for x in [0, 1, 4, 7, 10, 13]])
    color_labels = np.hstack(color_labels)

    color_approx = AdditiveChi2Sampler()
    color_approx_train = color_approx.fit_transform(color_train)
    clf = SGDClassifier(n_iter=10000,
                        alpha=.01,
                        n_jobs=-1,
                        class_weight='auto')
    clf.fit(color_approx_train, color_labels)
    print clf.score(color_approx_train, color_labels)
    color_test = color_approx.transform(color_histograms.reshape([-1, 5]))
    sgd_map = clf.predict(color_test).reshape(im_pos.shape[:2]) * mask[box]

    figure(1)
    title('Color Histograms per Joint')
    for i in range(14):
        subplot(4, 4, i + 1)
        ylabel('Count')
        xlabel('Sample')
        plot(joint_features['color_histograms'][i])
        axis([0, 10, 0, 30])

    for i in range(5):
        subplot(1, 5, i + 1)
        imshow(color_histograms[:, :, i])

    pickle.dump([clf, color_approx],
                open('color_histogram_approx_svm_5class.pkl', 'w'),
                pickle.HIGHEST_PROTOCOL)
    # clf_color,color_approx = pickle.load(open('color_histogram_approx_svm_5class.pkl'))
    ''' --- LBP Histogram features ---'''
    color_train = []
    color_labels = []
    for i in xrange(len(joint_features['lbp'])):
        ii = i
        if i not in [0, 1, 4, 7, 10, 13]:
            ii = 1
        else:
            color_labels += [i * np.ones(len(joint_features['lbp'][i]))]
        # color_labels += [i*np.ones(len(joint_features['color_histograms'][i]))]
    # color_train = np.vstack(joint_features['color_histograms'])
    color_train = np.vstack(
        [joint_features['lbp'][x] for x in [0, 1, 4, 7, 10, 13]])
    color_labels = np.hstack(color_labels)

    color_approx = AdditiveChi2Sampler()
    color_approx_train = color_approx.fit_transform(color_train)
    clf = SGDClassifier(n_iter=10000,
                        alpha=.01,
                        n_jobs=-1,
                        class_weight='auto')
    clf.fit(color_approx_train, color_labels)
    print clf.score(color_approx_train, color_labels)
    color_test = color_approx.transform(lbp_histograms.reshape([-1, 10]))
    sgd_map = clf.predict(color_test).reshape(im_pos.shape[:2]) * mask[box]

    figure(1)
    title('LBP Histograms per Joint')
    for i in range(14):
        subplot(4, 4, i + 1)
        ylabel('Count')
        xlabel('Sample')
        plot(joint_features['lbp'][i])
        axis([0, 10, 0, 30])

    for i in range(5):
        subplot(1, 5, i + 1)
        imshow(color_histograms[:, :, i])

    pickle.dump([clf, color_approx],
                open('lbp_histogram_approx_svm_5class.pkl', 'w'),
                pickle.HIGHEST_PROTOCOL)
示例#29
0
def cross_validate_bow(filename):
    '''
	Adapted from this example: http://scikit-learn.org/stable/auto_examples/grid_search_digits.html#example-grid-search-digits-py
	'''
    from sklearn.cross_validation import train_test_split
    from sklearn.grid_search import GridSearchCV
    from sklearn.metrics import classification_report
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.svm import SVC
    from sklearn.kernel_approximation import AdditiveChi2Sampler

    chi = AdditiveChi2Sampler()
    chi.fit(hogsH, labels)
    X = chi.fit_transform(hogsH, labels)

    # clf = svm.SVC(kernel='rbf', C=100)
    # clf.fit(X, np.array(labels))
    # print "Training accuracy: %f"%(clf.score(X, labels)*100.)

    scores = [
        ('precision', precision_score),
        ('recall', recall_score),
    ]
    for score_name, score_func in scores:

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            labels,
                                                            test_size=0.5,
                                                            random_state=0)
        tuned_parameters = [{
            'kernel': ['rbf'],
            'gamma': [1e-3, 1e-4],
            'C': [1, 10, 100, 1000]
        }, {
            'kernel': ['linear'],
            'C': [1, 10, 100, 1000]
        }]

        clf = GridSearchCV(SVC(C=1), tuned_parameters, score_func=score_func)
        clf.fit(X_train, y_train, cv=5)

        print "Best parameters set found on development set:"
        print
        print clf.best_estimator_
        print
        print "Grid scores on development set:"
        print
        for params, mean_score, scores in clf.grid_scores_:
            print "%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() / 2,
                                                params)
        print

        print "Detailed classification report:"
        print
        print "The model is trained on the full development set."
        print "The scores are computed on the full evaluation set."
        print
        y_true, y_pred = y_test, clf.predict(X_test)
        print classification_report(y_true, y_pred)
        print
        print "Best score: %f" % clf.best_score_