Пример #1
0
def phoneAccelerometerISVM():
    print("Loading data...")
    data = pd.read_csv("./Train_Phone-Acc-nexus4_1-a.csv")
    print("Done!")

    # Parse data and make bike vs not-biking classification using an SVM.
    # Note: I'm assuming a window width of 500
    print("Finding time series windows indexes for each class kind...")
    previousClassLabel = str(data.get_value(data.index[0], 'gt'))
    pos = 0
    y = []
    X = []
    window = 500
    while pos < data.shape[0]:
        # Make y label.
        if str(data.iloc[pos]['gt']) == 'sit':
            y.append(1)
        else:
            y.append(-1)

        # Make X row.
        X.append(data.iloc[pos:pos + window]['y'])

        # Move to the next window
        pos += window
    print("Done!")

    # Build and fit the SVM.
    print("Training SVM on all data accelerometer data...")
    X = np.array(X)
    y = np.array(y)
    #clfs = LinearSVC()
    clfs = SVC()
    clfs.fit(X, y)
    print("Done!")

    # print("Predicting accelerometer classes on all data using SVM...")
    # ypred = predict(X, clfs.coef_.reshape(len(clfs.coef_.ravel()), 1))
    # print("Done!")
    # error = calculateTotalAbsoluteError(y, ypred) / y.shape[0]
    # print("Accelerometer training error (Means kind of nothing): %f"%error)

    # Cross validation
    print("Training SVM on accelerometer training only data...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = 0.1) #, random_state = 0
    clfs = SVC()
    clfs.fit(X_train, y_train)
    yhat = clfs.predict(X_test)
    print("Abs Error = %f"%( calculateTotalAbsoluteError(yhat, y_test)/len(yhat)))
    print("Test data mean accuracy SVM score: %f"%clfs.score(X_test, y_test))
    f1_c0 = f1_score(y_test, clfs.predict(X_test), pos_label=1, average='binary')
    #print("Test data f1 score for class -1: %f"%(f1_c0))
    print("Test data f1 score for class +1: %f" % (f1_c0))
    print("Done!")
Пример #2
0
class Clf(object):
    SVC_RBF = SVC(kernel='rbf', class_weight=None, random_state=0)
    SVC_RBF_CW = SVC(kernel='rbf', class_weight='auto', random_state=0)
    LINEAR_L1 = LinearSVC(loss='l1', random_state=0, class_weight=None)
    LINEAR_L1_CW = LinearSVC(loss='l1', random_state=0, class_weight='auto')
    LINEAR_SVC = SVC(kernel='linear', random_state=0, class_weight='auto')
    TREE = DecisionTreeClassifier(random_state=0)
    RF = RandomForestClassifier(random_state=0)
    MAJORITY = DummyClassifier(strategy='most_frequent')
    RANDOM = DummyClassifier(strategy='stratified')
    ADABOOST = AdaBoostClassifier(random_state=0)
    LR = LogisticRegression()
Пример #3
0
    def fit(self, data, args):
        self.model = SVC(probability=True)

        with Timer() as t:
            self.model.fit(data.X_train, data.y_train)

        return t.interval
Пример #4
0
def train_ensemble_classifier():
    # classifier2 = SklearnClassifier(GaussianNB(), sparse=False)
    # classifier1 = SklearnClassifier(SVC(), sparse=False)
    # classifier3 = SklearnClassifier(RandomForestClassifier(), sparse=False)
    # classifier4 = SklearnClassifier(DecisionTreeClassifier(), sparse=False)
    classifier2 = SklearnClassifier(GaussianNB(), sparse=False)
    classifier1 = SklearnClassifier(SVC(degree=18, C=12), sparse=False)
    classifier3 = SklearnClassifier(RandomForestClassifier(max_depth=100,
                                                           n_estimators=10),
                                    sparse=False)
    classifier4 = SklearnClassifier(DecisionTreeClassifier(min_samples_split=2,
                                                           min_samples_leaf=2,
                                                           max_leaf_nodes=30,
                                                           splitter='best',
                                                           random_state=0),
                                    sparse=False)
    test_classifiers = []
    test_classifiers.append(classifier1)
    test_classifiers.append(classifier2)
    test_classifiers.append(classifier3)
    test_classifiers.append(classifier4)

    trained_classifiers = []

    for classifier in test_classifiers:
        classifier = classifier.train(train_features)
        trained_classifiers.append(classifier)

    voted_classifier = VoteClassifier(trained_classifiers)
    save_classifier(voted_classifier, 'voted_classifier.pickle')

    print_and_get_split_dataset_accuracy(test_classifiers, train_features)
    print_voted_classifier_cross_validation_experiment_result(
        test_classifiers, train_features)
Пример #5
0
def learnPhase():
    if os.path.isfile("Doc2VecSVMNauceni.pkl"):
        return None
    tablecolrow = loadData("train.csv")
    tablecolrow[3] = FilterQuestions(tablecolrow[3])
    tablecolrow[4] = FilterQuestions(tablecolrow[4])

    model = prepareDoc2Vec(tablecolrow[3], tablecolrow[4])

    for i in range(len(tablecolrow[3])):
        tablecolrow[3][i] = model.infer_vector(tablecolrow[3][i].split(" "))
        tablecolrow[4][i] = model.infer_vector(tablecolrow[4][i].split(" "))

    traindataX = [None] * len(tablecolrow[3])
    traindataY = [None] * len(tablecolrow[3])
    for i in range(len(traindataX)):
        traindataX[i] = tablecolrow[3][i] + tablecolrow[4][i]
        traindataY[i] = int(tablecolrow[5][i])

    svmKlasifikator = SVC(kernel='rbf',
                          verbose=True,
                          probability=True,
                          max_iter=1000000)
    print("Learning started")
    tmStart = timer()
    svmKlasifikator.fit(traindataX, traindataY)
    tmEnd = timer()
    print("Predicting lasted", tmEnd - tmStart)
    joblib.dump(svmKlasifikator, 'Doc2VecSVMNauceni.pkl')
    print("Spremljen je napredak ucenja")
Пример #6
0
    def __init__(self, *,
                 hyperparams: Hyperparams,
                 random_seed: int = 0,
                 docker_containers: Dict[str, str] = None,
                 _verbose: int = 0) -> None:

        super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)

        self._clf = SVC(
            C=self.hyperparams['C'],
            kernel=self.hyperparams['kernel'],
            degree=self.hyperparams['degree'],
            gamma=self.hyperparams['gamma'],
            coef0=self.hyperparams['coef0'],
            probability=self.hyperparams['probability'],
            shrinking=self.hyperparams['shrinking'],
            tol=self.hyperparams['tol'],
            class_weight=self.hyperparams['class_weight'],
            max_iter=self.hyperparams['max_iter'],
            decision_function_shape=self.hyperparams['decision_function_shape'],
            verbose=_verbose,
            random_state=self.random_seed,
        )
        self._training_inputs = None
        self._training_outputs = None
        self._fitted = False
Пример #7
0
def learnModel(data):
    if os.path.isfile("BagOfWordsSVMNauceni.pkl"):
        return None
    data[0] = FilterQuestions(data[0])
    data[1] = FilterQuestions(data[1])
    # Initialize the "CountVectorizer" object, which is scikit-learn's
    # bag of words tool.  
    vectorizer = CountVectorizer(analyzer = "word",   \
                             tokenizer = None,    \
                             preprocessor = None, \
                             stop_words = None,   \
                             max_features = 20000) 
    
    allQuestions = data[0] + data[1]

    vectorizer.fit(allQuestions)    
    joblib.dump(vectorizer, 'BagOfWordsVectorizerNauceni.pkl') 
    
    znacajkePitanja = [vectorizer.transform(data[0]), vectorizer.transform(data[1])]
    for i, r in enumerate(data[2]):
        data[2][i] = int(r)
        
    znacajkePitanja = hstack(znacajkePitanja).tocsr()
    svmKlasifikator = SVC(kernel='rbf', verbose=True, probability=True, max_iter=1000000)
   
    print("Learning started")
    tmStart = timer()
    svmKlasifikator.fit(znacajkePitanja, data[2])
    tmEnd = timer()
    print("Learning ended")
    print("Learning lasted", tmEnd - tmStart)
    
    joblib.dump(svmKlasifikator, 'BagOfWordsSVMNauceni.pkl') 
    print("Spremljen je napredak ucenja")
Пример #8
0
def svm_train(X, y, model_path):
    model = SVC()
    model.fit(X, y)
    expected = y
    predicted = model.predict(X)
    print(metrics.classification_report(expected, predicted))
    print(metrics.confusion_matrix(expected, predicted))
    joblib.dump(model, model_path)
Пример #9
0
def train_svm(params, suffix, train_X, train_Y, test_X, test_Y):
    C = params['C']
    kernel = params['kernel']
    model = SVC(gamma='scale', probability=True, C=C, kernel=kernel)
    print("Params C:", C, "kernel:", kernel)
    model.fit(train_X, train_Y)
    print("Train score", model.score(train_X, train_Y))
    test_score = model.score(test_X, test_Y)
    print("Test score", test_score)
    return test_score, None
Пример #10
0
 def train_all(self, g):
     X = np.concatenate([self.train_X, self.val_X], axis=0)
     if self.use_scale:
         self.scale.fit(X)
         X = self.scale.transform(X)
     for i in range(3):
         y = np.concatenate([self.train_y, self.val_y], axis=0)
         y[y!=i+1]=0
         y[y!=0]=1
         clf = SVC()
         clf.set_params(**g)
         self.model_a.append(clf.fit(X, y))
Пример #11
0
 def test_kernel_sigmoid(self):
     clf = SVC(C=1., kernel='sigmoid', gamma=0.001, random_state=0)
     self.set_classifier(clf)
     java_preds, py_preds = [], []
     min_vals = np.amin(self.X, axis=0)
     max_vals = np.amax(self.X, axis=0)
     for n in range(self.N_RANDOM_TESTS):
         x = [random.uniform(min_vals[f], max_vals[f]) for f in
              range(self.n_features)]
         java_preds.append(self.make_pred_in_js(x))
         py_preds.append(self.make_pred_in_py(x))
     self.assertListEqual(py_preds, java_preds)
Пример #12
0
 def test_kernel_sigmoid(self):
     clf = SVC(C=1., kernel='sigmoid', gamma=0.001, random_state=0)
     self._port_model(clf)
     Y, Y_py = [], []
     min_vals = np.amin(self.X, axis=0)
     max_vals = np.amax(self.X, axis=0)
     for n in range(self.n_random_tests):
         x = [random.uniform(min_vals[f], max_vals[f]) for f in
              range(self.n_features)]
         Y.append(self.make_pred_in_custom(x))
         Y_py.append(self.make_pred_in_py(x))
     self.assertListEqual(Y, Y_py)
Пример #13
0
 def test_pipeline_estimator(self):
     self.X, self.y = samples_generator.make_classification(
         n_informative=5, n_redundant=0, random_state=42)
     anova_filter = SelectKBest(f_regression, k=5)
     self.mdl = Pipeline([('anova', anova_filter), ('svc', SVC(kernel='linear'))])
     self.mdl.set_params(anova__k=10, svc__C=.1)
     try:
         self._port_model()
     except Exception as e:
         self.fail('Unexpected exception raised: {}'.format(e.message))
     finally:
         self._clear_model()
Пример #14
0
 def train(self, g):
     self.model = []
     X = self.train_X.copy()
     if self.use_scale:
         self.scale.fit(X)
         X = self.scale.transform(X)
     for i in range(3):
         y = self.train_y.copy()
         y[y!=i+1]=0
         y[y!=0]=1
         clf = SVC()
         clf.set_params(**g)
         self.model.append(clf.fit(X, y))
Пример #15
0
    def SVCClassify(self, x_train, y_train):
        '''
        Basic Support Vector Machine Classifier
        '''

        # the parameter can be set
        kernel = 'rbf'
        # init classifier and train it
        # if need the proba-predict result, parameter probability must be =True
        clf = SVC(kernel=kernel, probability=True)
        clf.fit(x_train, y_train)

        return clf
Пример #16
0
 def test_sigmoid_kernel(self):
     self.mdl = SVC(C=1., kernel='sigmoid', gamma=0.001, random_state=0)
     self.load_iris_data()
     self._port_model()
     amin = np.amin(self.X, axis=0)
     amax = np.amax(self.X, axis=0)
     preds, ground_truth = [], []
     for _ in range(self.N_RANDOM_FEATURE_SETS):
         x = np.random.uniform(amin, amax, self.n_features)
         preds.append(self.pred_in_custom(x))
         ground_truth.append(self.pred_in_py(x))
     self._clear_model()
     # noinspection PyUnresolvedReferences
     self.assertListEqual(preds, ground_truth)
Пример #17
0
 def test_kernel_poly(self):
     clf = SVC(C=1., kernel='poly', gamma=0.001, random_state=0)
     self._port_model(clf)
     java_preds, py_preds = [], []
     min_vals = np.amin(self.X, axis=0)
     max_vals = np.amax(self.X, axis=0)
     for n in range(self.n_random_tests):
         x = [
             random.uniform(min_vals[f], max_vals[f])
             for f in range(self.n_features)
         ]
         java_preds.append(self.make_pred_in_js(x))
         py_preds.append(self.make_pred_in_py(x))
     self.assertListEqual(py_preds, java_preds)
Пример #18
0
 def test_auto_gamma(self):
     self.estimator = SVC(C=1., gamma='auto', random_state=0)
     self.load_iris_data()
     self._port_estimator()
     amin = np.amin(self.X, axis=0)
     amax = np.amax(self.X, axis=0)
     preds, ground_truth = [], []
     for _ in range(self.N_RANDOM_FEATURE_SETS):
         x = np.random.uniform(amin, amax, self.n_features)
         preds.append(self.pred_in_custom(x))
         ground_truth.append(self.pred_in_py(x))
     self._clear_estimator()
     # noinspection PyUnresolvedReferences
     self.assertListEqual(preds, ground_truth)
def cross_validate(samples, labels, outputDir):
    '''
    Function to perform K-fold cross validation
    '''
    # K(=10) FOLD CROSS VALIDATION
    K = 10
    fold_samples, fold_labels = cv_split(samples, np.array(labels), K)
    log_loss = [['Log Loss'],[]]
    total_ll = 0.0
    for fold in range(K):
        samples_chunk = fold_samples[:fold] + fold_samples[fold+1:]
        labels_chunk = fold_labels[:fold] + fold_labels[fold+1:]
    
        #Training L1 logistic regression
        logRegrL1 = linear_model.LogisticRegression(C=1, penalty='l1')
        logRegrL1.fit( np.concatenate(samples_chunk, axis=0), np.concatenate(labels_chunk, axis = 0) )
    
        #Training SVM with linear kernel
        svmLin = SVC(kernel='linear', probability=True)
        svmLin.fit( np.concatenate(samples_chunk, axis=0), np.concatenate(labels_chunk, axis = 0) )
    
        #Training Random Forest Classifier
        rfc = RandomForestClassifier(n_estimators=100)
        rfc.fit( np.concatenate(samples_chunk, axis=0), np.concatenate(labels_chunk, axis = 0) )
    
        #TEST ON CROSS VALIDATION HOLD OUT SET
        val = [i for i in range(len(fold_labels[fold]))]
        id = 0
        for item in fold_samples[fold]:
            predictionL1 = logRegrL1.predict_proba(item)#first component is probability of 0 class, second is of class 1
            predictionSvmLin = svmLin.predict_proba(item)
            predictionRfc = rfc.predict_proba(item)
    
            #Taking the average of each of the model predictions as final health status prediction
            val[id] = (predictionL1[0][1] + predictionSvmLin[0][1] + predictionRfc[0][1])/3.0
            id = id + 1
    
        
        for i in range(len(fold_labels[fold])):
            total_ll += logloss(fold_labels[fold][i], val[i])
    
    
    log_loss[1] = total_ll/len(samples)
    #Save csv file in the output directory with name Dota2Val.csv
    np.savetxt(outputDir + "\\Dota2Val.csv", 
           log_loss,
           delimiter=',', 
           fmt='%s'
           )
def train_and_predict(samples, labels, feature_selector, inputDir, outputDir):
    #Training L1 logistic regression
    logRegrL1 = linear_model.LogisticRegression(C=1, penalty='l1')
    logRegrL1.fit(samples, labels)

    #Training SVM with linear kernel
    svmLin = SVC(kernel='linear', probability=True)
    svmLin.fit(samples, labels)

    #Training Random Forest Classifier
    rfc = RandomForestClassifier(n_estimators=100)
    rfc.fit(samples, labels)

    #test set
    testDir = inputDir + "/set_test"
    testFiles = sorted([
        join(testDir, f) for f in listdir(testDir) if isfile(join(testDir, f))
    ],
                       key=numericalSort)

    #Read feature vectors of test images
    testSamples = cubeVoxelsVar(testFiles)
    testSamples = feature_selector.transform(testSamples)
    print(len(testSamples))

    #2D array to report final prediction in format (ID,Prediction)
    final = [[0 for j in range(2)] for i in range(139)]
    final[0][0] = 'ID'
    final[0][1] = 'Prediction'
    id = 1

    #Predict health status of test image using each of the 3 models trained above
    for item in testSamples:
        predictionL1 = logRegrL1.predict_proba(
            item
        )  #first component is probability of 0 class, second is of class 1
        predictionSvmLin = svmLin.predict_proba(item)
        predictionRfc = rfc.predict_proba(item)

        final[id][0] = id
        #Taking the average of each of the model predictions as final health status prediction
        final[id][1] = (predictionL1[0][1] + predictionSvmLin[0][1] +
                        predictionRfc[0][1]) / 3.0
        id = id + 1

    #Save csv file in the output directory with name final_sub.csv
    np.savetxt(outputDir + "/final_sub.csv", final, delimiter=',', fmt='%s')
Пример #21
0
def train_cv_clf(topics_train,
                 classes_train,
                 features,
                 n_folds=10,
                 param_grid=_PARAM_GRID,
                 tuned_clf=SVC(C=1, kernel='linear'),
                 scoring=util.weighted_f1,
                 random_state=0):
    """Trains the topic type classifier, given the various parameters.
    
    """
    kf = cross_validation.KFold(len(topics_train),
                                n_folds=n_folds,
                                random_state=random_state)
    cv_clf = GridSearchCV(estimator=tuned_clf,
                          param_grid=param_grid,
                          cv=kf,
                          scoring=scoring)
    topic_vectors_train = to_features(features, topics_train)
    cv_clf.fit(topic_vectors_train, classes_train)
    return cv_clf
Пример #22
0
def learnModel(train):

    data = []
    for duplicate in train["is_duplicate"]:
        data.append(int(duplicate))

    znacajkePitanja = get_avg(train)
    svmKlasifikator = SVC(kernel='rbf',
                          verbose=True,
                          probability=True,
                          max_iter=10000)

    print("Learning started")
    tmStart = timer()
    svmKlasifikator.fit(znacajkePitanja, data)
    tmEnd = timer()
    print("Learning ended")
    print("Learning lasted", tmEnd - tmStart)

    joblib.dump(svmKlasifikator, 'Word2VecSVMNauceni.pkl')
    print("Spremljen je napredak ucenja")
Пример #23
0
def classifier_panchenko2016(X_train,
                             y_train,
                             X_test,
                             y_test,
                             separateClassifier=False):
    train_or_test_labels = ["train"
                            for i in y_train] + ["test" for i in y_test]
    y_train, X_train, y_test, X_test = outlier_removal(train_or_test_labels,
                                                       X_train + X_test,
                                                       y_train + y_test)

    y_train, X_train = features_extraction(
        y_train,
        X_train,
        separateClassifier=separateClassifier,
        featuresCount=100)

    y_test, X_test = features_extraction(y_test,
                                         X_test,
                                         separateClassifier=separateClassifier,
                                         featuresCount=100)

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    classifier = SVC(kernel="rbf",
                     C=2e11,
                     gamma=2e-1,
                     max_iter=5000,
                     class_weight="balanced",
                     verbose=1)

    print("fitting")
    classifier.fit(X_train, y_train)

    print("testing")
    y_predictions = classifier.predict(X_test)  #, y_test)

    return y_test, y_predictions
Пример #24
0
def featureSelector(data,trainHeaderList,target,selectorType):
    dataFrame = pd.DataFrame(data)
    if(selectorType == 'VT'):
        cols = dataFrame.columns
        pi = 0.6
        selector = VarianceThreshold(threshold=(pi*(1-pi)))
        values = selector.fit_transform(dataFrame)
        labels = list()
        i = 0
        for x in selector.get_support(indices=False):
            if x:
                labels.append(trainHeaderList.__getitem__(i))
            i += 1
        return pd.DataFrame(values , columns=labels)

    elif(selectorType == 'KB'):
        selector = SelectKBest(chi2, k=6)
        values = selector.fit_transform(dataFrame, target)
        labels = list()
        i = 0
        for x in selector.get_support(indices=False):
            if x:
                labels.append(trainHeaderList.__getitem__(i))
            i += 1
        return pd.DataFrame(values, columns=labels)

    elif(selectorType == 'SVC'):
        svc = SVC(kernel="linear", C=1)
        selector = RFE(estimator=svc, n_features_to_select=20, step=0.5, verbose=5)
        values =selector.fit_transform(dataFrame, target)
        labels = list()
        i = 0
        for x in selector.get_support(indices=False):
            if x:
                labels.append(trainHeaderList.__getitem__(i))
            i += 1
        return pd.DataFrame(values, columns=labels)
Пример #25
0
            if Y_label != 'NULL' or random.random() > 0:
                if Y_label == event_name:
                    Y = 1
                else:
                    Y = 0

                if i == 0:
                    X_all = X
                    Y_all = Y
                    i = 1
                else:
                    X_all = np.vstack((X_all, X))
                    Y_all = np.append(Y_all, Y)
                    i += 1
        # print (i)
    # print (np.sum(X_all, axis = 1))
    # print(X_all, Y_all)

    clf = SVC(kernel=chi2_kernel)
    # clf = SVC()
    clf.fit(X_all, Y_all)

    print(clf.score(X_all, Y_all))
    print(clf.predict(X_all))

    fread.close()

    cPickle.dump(clf, open(output_file, "wb"))

    print 'SVM trained successfully for event %s!' % (event_name)
Пример #26
0
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)

#:# preprocessing

transform_pipeline = Pipeline([('scaler', StandardScaler())])

X_train = pd.DataFrame(transform_pipeline.fit_transform(X_train),
                       columns=X_train.columns)

#:# model

params = {'gamma': 5, 'kernel': 'sigmoid', 'probability': True}

classifier = SVC(**params)
classifier.fit(X_train, y_train)

#:# hash
#:# aad366f6d5961bc98783c2ad9fb3918d
md5 = hashlib.md5(str(classifier).encode('utf-8')).hexdigest()
print(f'md5: {md5}')

#:# audit
y_pred = classifier.predict(transform_pipeline.transform(X_test))
y_pred_proba = classifier.predict_proba(
    transform_pipeline.transform(X_test))[:, 1]

tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

print(f'acc: {accuracy_score(y_test, y_pred)}')
        audio_name=line.split(" ")[0]
        #print count
        count=count+1
 #       if (count%100==0):
#	    print count
        label=line.split(" ")[1].split("\n")[0]
        if "imtraj" in feat_dir: 
            feat_vec=import_imtraj_txt(feat_dir+audio_name+".spbof")
        else:
            feat_vec=np.genfromtxt(feat_dir+audio_name,delimiter=";")
        if (label==event_name):
            label=1
            pos_count+=1
        else:
            label=0
            neg_count+=1
        if len(X)==0:
            X=[feat_vec]
        else:
            X=np.append(X,[feat_vec],axis=0)
        Y=Y+[label]
    
    print "Data loading finished positive "+str(pos_count)+" negative "+str(neg_count)
    #pipe_lrSVC=SVC(C=10,gamma=0.0001,probability=True)
    pipe_lrSVC=SVC(probability=True)
    #svm=LinearSVC(C=10)
    #pipe_lrSVC=CalibratedClassifierCV(svm)
    pipe_lrSVC.fit(preprocessing.scale(X),Y)
    pickle.dump(pipe_lrSVC,open(output_file+'.pickle','wb'))
    print 'SVM trained successfully for event %s!' % (event_name)+" round num %s" % (round_num)
Пример #28
0
 def setUp(self):
     super(SVCCTest, self).setUp()
     self.mdl = SVC(C=1., kernel='rbf', gamma=0.001, random_state=0)
    ('extender', AttributesExtension()),
    ('imputer', SimpleImputer(strategy="mean")),
])
learning_data = pipeline.fit_transform(features_data)

# ### Select a model

# In[ ]:

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.svm.classes import SVC
from sklearn.metrics import accuracy_score

svc = SVC()
log_reg = LogisticRegression()
#log_reg.fit(learning_data, labels)
rand_for = RandomForestClassifier()
#rand_for.fit(learning_data, labels)

models = {
    "Logistic Regression": log_reg,
    "Random Forest": rand_for,
    "SVM": svc,
}

for model in models.keys():
    scores = cross_val_score(models[model],
                             learning_data,
                             labels,
Пример #30
0
from sklearn.svm.classes import SVC
import cPickle
import sys
import time

# Performs K-means clustering and save the model to a local file

if __name__ == '__main__':
    t1 = time.time()
    event_name = "P003"
    feat_dir = "kmeans/"
    feat_dim = 50
    output_file = "mfcc_pred/svm.%s.model" % event_name

    fread = open("list/train", "r")
    clf = SVC(probability=True)
    X, Y = [], []
    for i in fread.readlines():
        i = i.split(" ")
        line = i[0]
        label = i[1].replace('\n', '')
        kmeans_path = "kmeans/" + line + ".kmeans.txt"
        if os.path.exists(kmeans_path):
            kmeans_feat = numpy.genfromtxt(kmeans_path, delimiter=";")
        else:
            kmeans_feat = numpy.zeros(feat_dim)
            label = "NULL"
        if label != event_name:
            label = "NULL"
        X.append(kmeans_feat)
        Y.append(label)