Python SVC.C示例，sklearn.svm.SVC.C Python示例

示例#1

0

显示文件

文件： Experimeter_AS3.py 项目： mindaika/PyML

def parameter_search():
    template = "{0:25}{1:15}{2:15}{3:15}{4:15}"
    print(
        template.format("\nClassifier", "Accuracy(%)", "Runtime(s)",
                        "i * scalar", "Predict(s)"))

    for i in range(1, 10):
        scalar = 10

        test_value = (i * scalar)
        # float(str(i / 10))
        X_train, X_test, Y_train, Y_test = train_test_split(X_yeast,
                                                            Y_yeast,
                                                            train_size=0.65)

        _estimator = SVC(kernel='linear')
        _estimator.C = (i * scalar)

        start = time.time()
        _estimator.fit(X_train, Y_train)
        end = time.time()

        start_predict = time.time()
        prediction = (accuracy_score(Y_test, _estimator.predict(X_test)))
        end_predict = time.time()

        print(
            template.format('Test value=' + str(test_value),
                            "%.2f" % (prediction * 100),
                            "%.2f" % (end - start), str(i * scalar),
                            "%.2f" % (end_predict - start_predict)))

示例#2

0

显示文件

文件： MySVM.py 项目： jp1989326/Machine_learning_for_reliability_analysis

    def trainauc (self, train, trainlabel, seed, Cmin, Cmax, numC, rmin, rmax, numr, degree=3, method = 'roc_auc', rad_stat =2):
        C_range=np.logspace(Cmin, Cmax, num=numC, base=2,endpoint= True)
        gamma_range=np.logspace(rmin, rmax, num=numr, base=2,endpoint= True)
        
        svc = SVC(kernel=seed)
#        mean_score=[]
        df_C_gamma= DataFrame({'gamma_range':gamma_range})
#        df_this = DataFrame({'gamma_range':gamma_range})
        count = 0 
        for C in C_range:    
            score_C=[]    
#            score_C_this = []
            count=count+1
            for gamma in gamma_range:                   
     
                svc.C = C
                svc.gamma = gamma
                svc.degree = degree
                svc.random_state = rad_stat
                this_scores = cross_val_score(svc, train, trainlabel, scoring=method, cv=10, n_jobs=-1 \
                                              )
                
                score_C.append(np.mean(this_scores))                                      

               #score_C_this.append(np.mean(this_scores))
            print (np.mean(score_C) )
            print ("%r cycle finished, %r left" %(count, numC-count))
            df_C_gamma[C]= score_C
            #df_this[C] = score_C_this        
        
        return df_C_gamma

示例#3

0

显示文件

    def __tune_on_kfolds(self, k_folds, sample_vector, targets, c_val):
        print("Tuning on K-Folds...")
        svm = SVC(kernel='linear', random_state=1)
        svm.C = c_val
        correct_predictions = 0

        # split samples and targets into k-folds
        samples = np.array_split(sample_vector, k_folds)
        targets = np.array_split(targets, k_folds)

        # iterate over the k-folds
        for i in range(k_folds):
            print("Fold " + str(i + 1))
            test_fold_data = samples[i]
            training_folds_data = samples.copy()
            del training_folds_data[i]
            training_folds_data = np.concatenate(training_folds_data, axis=0)

            test_fold_targets = targets[i]
            training_folds_targets = targets.copy()
            del training_folds_targets[i]
            training_folds_targets = np.concatenate(training_folds_targets,
                                                    axis=0)

            svm.fit(training_folds_data, training_folds_targets)
            predictions = svm.predict(test_fold_data)

            for p, _ in enumerate(predictions):
                if predictions[p] == test_fold_targets[p]:
                    correct_predictions += 1
        '''return accuracy value'''
        return correct_predictions / len(sample_vector)

示例#4

0

显示文件

def select_param_rbf(X, y, kf, metric="accuracy"):
    """
    Sweeps different settings for the hyperparameters of an RBF-kernel SVM,
    calculating the k-fold CV performance for each setting, then selecting the
    hyperparameters that 'maximize' the average k-fold CV performance.
    
    Parameters
    --------------------
        X       -- numpy array of shape (n,d), feature vectors
                     n = number of examples
                     d = number of features
        y       -- numpy array of shape (n,), binary labels {1,-1}
        kf      -- model_selection.KFold or model_selection.StratifiedKFold
        metric  -- string, option used to select performance measure
    
    Returns
    --------------------
        C        -- float, optimal parameter value for an RBF-kernel SVM
        gamma    -- float, optimal parameter value for an RBF-kernel SVM
    """

    print 'RBF SVM Hyperparameter Selection based on ' + str(metric) + ':'

    ### ========== TODO : START ========== ###
    # part 3b: create grid, then select optimal hyperparameters using cross-validation

    C_range = 10.0**np.arange(-3, 3)
    gamma_range = np.logspace(-9, 3, 13)
    grid = [[0 for _ in xrange(len(gamma_range))]
            for _ in xrange(len(C_range))]

    i = 0

    for curr_c in C_range:
        j = 0
        for gamma in gamma_range:
            perf_total = 0

            for train_index, test_index in kf.split(X, y):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]
                model = SVC(kernel='rbf', gamma=gamma)
                model.C = curr_c
                model.fit(X_train, y_train)
                predictions = model.decision_function(X_test)
                perf_total += performance(y_test, predictions, metric)

            grid[i][j] = perf_total / kf.n_splits
            j = j + 1

        i = i + 1

    # get the index of the max value in a FLATTENED grid
    maxxx_i = np.argmax(grid)

    #now we figure out the location of that in the 2D grid array
    gamma_index = maxxx_i % len(gamma_range)
    c_index = maxxx_i / len(gamma_range)

    return C_range[c_index], gamma_range[gamma_index]

示例#5

0

显示文件

文件： utils.py 项目： clundstrom/ml_supervised_learning

def pickParams(svc: SVC, gamma, c):
    """
    Aids gridsearch in choosing parameters for SVM.
    :param svc: SVM
    :param gamma: param
    :param c: param
    :return: tuned SVM
    """
    svc = svc
    if svc.kernel == 'linear':
        svc.C = c
    elif svc.kernel == 'poly':
        svc.C = c
        svc.D = gamma
    elif svc.kernel == 'rbf':
        svc.C = c
        svc.gamma = gamma
    return svc

示例#6

0

显示文件

文件： main.py 项目： yoraml/microbiome

def get_model(PARAMS):
    '''Get model according to parameters'''
    model = SVC()
    model.C = PARAMS.get('C')
    model.keral = PARAMS.get('keral')
    model.degree = PARAMS.get('degree')
    model.gamma = PARAMS.get('gamma')
    model.coef0 = PARAMS.get('coef0')

    return model

示例#7

0

显示文件

文件： epi_prediction.py 项目： bonilhamusclab-projects/epi_prediction

 def new_pipe(mod):
     svc = SVC()
     svc.kernel = 'linear'
     svc.C = params_map[mod]['C']
     svc.probability = True
     masker = SimpleMaskerPipeline(.2)
     return Pipeline([
         ('columns', ColumnSelector(index_map[mod])),
         ('whitematter', masker),
         ('anova', SelectKBest(k=500)),
         ('svc', svc)
     ])

示例#8

0

显示文件

文件： MySVM.py 项目： falcong/Machine_learning_for_reliability_analysis

    def trainSVC(self,
                 train,
                 trainlabel,
                 seed,
                 Cmin,
                 Cmax,
                 numC,
                 rmin,
                 rmax,
                 numr,
                 degree=3):
        C_range = np.logspace(Cmin, Cmax, num=numC, base=2, endpoint=True)
        gamma_range = np.logspace(rmin, rmax, num=numr, base=2, endpoint=True)

        svc = SVC(kernel=seed)
        #        mean_score=[]
        df_C_gamma = DataFrame({'gamma_range': gamma_range})
        #        df_this = DataFrame({'gamma_range':gamma_range})
        count = 0
        for C in C_range:
            score_C = []
            #            score_C_this = []
            count = count + 1
            for gamma in gamma_range:

                training_manCV.secret_cm = []
                training_manCV.secret_score = []
                svc.C = C
                svc.gamma = gamma
                svc.degree = degree
                this_scores = cross_val_score(
                    svc,
                    train,
                    trainlabel,
                    scoring=training_manCV().metric_scores,
                    cv=10,
                    n_jobs=-1)

                df_raw0 = DataFrame({'cm': training_manCV.secret_cm})

                score_C.append(np.mean(df_raw0['cm'].tail(10)))

            #score_C_this.append(np.mean(this_scores))
            print(np.mean(this_scores))
            print("%r cycle finished, %r left" % (count, numC - count))
            df_C_gamma[C] = score_C
            #df_this[C] = score_C_this

        return df_C_gamma

示例#9

0

显示文件

文件： MySVM.py 项目： falcong/Machine_learning_for_reliability_analysis

    def trainauc(self,
                 train,
                 trainlabel,
                 seed,
                 Cmin,
                 Cmax,
                 numC,
                 rmin,
                 rmax,
                 numr,
                 degree=3,
                 method='roc_auc',
                 rad_stat=2):
        C_range = np.logspace(Cmin, Cmax, num=numC, base=2, endpoint=True)
        gamma_range = np.logspace(rmin, rmax, num=numr, base=2, endpoint=True)

        svc = SVC(kernel=seed)
        #        mean_score=[]
        df_C_gamma = DataFrame({'gamma_range': gamma_range})
        #        df_this = DataFrame({'gamma_range':gamma_range})
        count = 0
        for C in C_range:
            score_C = []
            #            score_C_this = []
            count = count + 1
            for gamma in gamma_range:

                svc.C = C
                svc.gamma = gamma
                svc.degree = degree
                svc.random_state = rad_stat
                this_scores = cross_val_score(svc, train, trainlabel, scoring=method, cv=10, n_jobs=-1 \
                                              )

                score_C.append(np.mean(this_scores))

            #score_C_this.append(np.mean(this_scores))
            print(np.mean(score_C))
            print("%r cycle finished, %r left" % (count, numC - count))
            df_C_gamma[C] = score_C
            #df_this[C] = score_C_this

        return df_C_gamma

示例#10

0

显示文件

文件： savable.py 项目： zeetaros/KickStart

    def to_super(self):
        if self.kernel == "linear":
            superinstance = SVC(kernel="linear")
            # superinstance.coef_ = self.coef_
        else:
            superinstance = SVC()

        superinstance.C = self.C
        superinstance._dual_coef_ = self._dual_coef_
        superinstance._gamma = self._gamma
        superinstance._impl = self._impl
        superinstance._intercept_ = self._intercept_
        superinstance._sparse = self._sparse
        superinstance.cache_size = self.cache_size
        superinstance.class_weight = self.class_weight
        superinstance.class_weight_ = self.class_weight_
        superinstance.classes_ = self.classes_
        superinstance.coef0 = self.coef0
        superinstance.decision_function_shape = self.decision_function_shape
        superinstance.degree = self.degree
        superinstance.dual_coef_ = self.dual_coef_
        superinstance.epsilon = self.epsilon
        superinstance.fit_status_ = self.fit_status_
        superinstance.gamma = self.gamma
        superinstance.intercept_ = self.intercept_
        superinstance.kernel = self.kernel
        superinstance.max_iter = self.max_iter
        superinstance.n_support_ = self.n_support_
        superinstance.nu = self.nu
        superinstance.probA_ = self.probA_
        superinstance.probB_ = self.probB_
        superinstance.probability = self.probability
        superinstance.random_state = self.random_state
        superinstance.shape_fit_ = self.shape_fit_
        superinstance.shrinking = self.shrinking
        superinstance.support_ = self.support_
        superinstance.support_vectors_ = self.support_vectors_
        superinstance.tol = self.tol
        superinstance.verbose = self.verbose
        return superinstance

示例#11

0

显示文件

文件： construct_classifier.py 项目： zy20030535/Workflow-for-Optimal-Radiomics-Classification

def construct_SVM(config, regression=False):
    """Construct a SVM classifier.

    Args:
        config (dict): Dictionary of the required config settings
        features (pandas dataframe): A pandas dataframe containing the features
         to be used for classification

    Returns:
        SVM/SVR classifier, parameter grid

    """
    max_iter = config['max_iter']
    if not regression:
        clf = SVC(class_weight='balanced',
                  probability=True,
                  max_iter=max_iter,
                  random_state=config['random_seed'])
    else:
        clf = SVMR(max_iter=max_iter, random_state=config['random_seed'])

    clf.kernel = str(config['SVMKernel'])
    clf.C = config['SVMC']
    clf.degree = config['SVMdegree']
    clf.coef0 = config['SVMcoef0']
    clf.gamma = config['SVMgamma']

    # Check if we need to use a ranked SVM
    if config['classifiers'] == 'RankedSVM':
        clf = RankedSVM()
        param_grid = {
            'svm': ['Poly'],
            'degree': [2, 3, 4, 5],
            'gamma': scipy.stats.uniform(loc=0, scale=1e-3),
            'coefficient': scipy.stats.uniform(loc=0, scale=1e-2),
        }

    return clf

示例#12

0

显示文件

文件： MySVM.py 项目： jp1989326/Machine_learning_for_reliability_analysis

    def trainSVC (self, train, trainlabel, seed, Cmin, Cmax, numC, rmin, rmax, numr, degree=3):
        C_range=np.logspace(Cmin, Cmax, num=numC, base=2,endpoint= True)
        gamma_range=np.logspace(rmin, rmax, num=numr, base=2,endpoint= True)
        
        svc = SVC(kernel=seed)
#        mean_score=[]
        df_C_gamma= DataFrame({'gamma_range':gamma_range})
#        df_this = DataFrame({'gamma_range':gamma_range})
        count = 0 
        for C in C_range:    
            score_C=[]    
#            score_C_this = []
            count=count+1
            for gamma in gamma_range:
                
                training_manCV.secret_cm=[]     
                training_manCV.secret_score=[]      
                svc.C = C
                svc.gamma = gamma
                svc.degree = degree
                this_scores = cross_val_score(svc, train, trainlabel, scoring=training_manCV().metric_scores, cv=10, n_jobs=-1)
                
       

                df_raw0 = DataFrame({'cm':training_manCV.secret_cm})
               
                
                score_C.append(np.mean(df_raw0['cm'].tail(10)))

               #score_C_this.append(np.mean(this_scores))
            print (np.mean(this_scores) )
            print ("%r cycle finished, %r left" %(count, numC-count))
            df_C_gamma[C]= score_C
            #df_this[C] = score_C_this 
        
        
        return df_C_gamma

示例#13

0

显示文件

文件： iaml01cw2_q2.py 项目： alexchelba/IAML

def iaml01cw2_q2_5():
    Xsmall = []
    Ysmall = []
    counter = np.zeros(10)
    for i in range(len(Xtrn_nm)):
        if counter[Ytrn[i]] < 1000:
            Xsmall.append(Xtrn_nm[i])
            Ysmall.append(Ytrn[i])
            counter[Ytrn[i]] += 1
    Xsmall = np.array(Xsmall)
    Ysmall = np.array(Ysmall)
    Cs = np.logspace(-2, 3, 10)
    svm = SVC(kernel='rbf', gamma='auto')
    means = []
    maxx = 0.
    Cmax = 0.
    for C in Cs:
        svm.C = C
        this_scores = cross_val_score(svm,
                                      Xsmall,
                                      Ysmall,
                                      cv=3,
                                      scoring='accuracy')
        m = this_scores.mean()
        means.append(m)
        if m > maxx:
            maxx = m
            Cmax = C
    plt.plot(Cs, means)
    plt.xlabel('(log-scale) Regularisation parameter')
    plt.ylabel('Mean accuracies')
    plt.axes().set_xscale('log')
    plt.title('Plot of mean accuracy for 10 evenly log-spaced values of C')
    print('Max mean accuracy = {} for C = {}'.format(round(maxx, 5),
                                                     round(Cmax, 4)))
    plt.show()
    return Cmax

示例#14

0

显示文件

train_features = bag.get_features(train_sentences)
test_features = bag.get_features(test_sentences)
scaled_train_sentences, scaled_test_sentences = normalize_data(train_features,
                                                               test_features,
                                                               type='l2')

# testing
print(scaled_train_sentences[:1])
print(scaled_test_sentences[:1])

# ### 6. Train a SVM with linear kernel that classifies spam/non-spam messages. Use parameter C of value 1.
#
# Calculate the `accuracy` and `F1-score` for the testing data.

svm_model = SVC()
svm_model.C = 1
svm_model.kernel = 'linear'

# train - nonscaled data
svm_model.fit(train_features, train_labels)

# predict - nonscaled data
predictions = svm_model.predict(test_features)
print(accuracy_score(test_labels, predictions))
print(f1_score(test_labels, predictions))

svm_model = SVC()
svm_model.C = 1
svm_model.kernel = 'linear'

# train - scaled data

示例#15

0

显示文件

文件： rbm.py 项目： zouwen198317/CNN-detection-tracking

     joblib.dump(gs, "grid_cv_rbm.pkl", compress=3)
     
 else:
     # 直接设置参数训练
     bow = BoWFeature()
     bow.patch_num=10000
     bow.patch_size=(20,20)
     bow.learning_rate=0.001
     bow.n_components=512
     bow.n_iter=100
     bow.sample_num = 1000
     
     bow.fit(x_train)
     
     svm = SVC(kernel='linear', probability = True, random_state=42)
     svm.C = 1000
     #lr = LogisticRegression()
     #lr.C = 100
     best = Pipeline([('bow', bow),('svm',svm)])
     best.fit(x_train, y_train)
     
     print "*********************Save*******************************"
     joblib.dump(best, "classifier_rbm.pkl", compress=3)
             
 print "*********************Test*******************************"
 y_test_pre = best.predict(x_test)
 cm = confusion_matrix(y_test, y_test_pre)
 from map_confusion import plot_conf
 plot_conf(cm, range(le.classes_.size), 'RSDataset.png')
 
 from sklearn.metrics import classification_report

示例#16

0

显示文件

##C,gamma values to test
#values = [[0.001, 0.1],
#          [0.01, 0.2],
#          [1, 0.5],
#          [10, 2],
#          [100, 0.02],
#          [100, 1]]
#
##retrieve res from different exec
xval_acc_mean = np.zeros((len(values, )))
xval_acc_std = np.zeros((len(values, )))
#
#cross-valid
for i, value_C in enumerate(values):
    #set the C value
    clf.C = value_C
    #create a vector to store accuracy results
    xval_acc = np.zeros((splitter.get_n_splits()))
    k = 0
    #split data and labels into train and test
    for tr_idx, ts_idx in splitter.split(x_tr):
        x_tr_xval = x_tr[tr_idx, :]
        y_tr_xval = y_tr[tr_idx]
        x_ts_xval = x_tr[ts_idx, :]
        y_ts_xval = y_tr[ts_idx]

        #train a model
        clf.fit(x_tr_xval, y_tr_xval)
        #test the trained model
        yc = clf.predict(x_ts_xval)
        xval_acc[k] = np.mean(yc == y_ts_xval)

示例#17

0

显示文件

# Call the OneClassSVM.
#Note the important parameters of the OneClassSVM 'nu' corresponds to the 'v' parameter
clf = svm.OneClassSVM(kernel='rbf', max_iter=1000000, cache_size=200, nu=0.2)

# Train the OneSVM with the X_train_filtered_bin matrix
clf.fit(X_train_filtered)
print("OneClass-SVM trained .... ")
# Test the OneSVM with the X_test_bin sparce matrix
y_predicted_OneClass = clf.predict(X_test)
print("OneClass-SVM tested  .... ")
F1, R, P = clf_performance(y_outliers_test, y_predicted_OneClass, on_loop=0)

###############################################################################
# Outlier SVM Classification
###############################################################################

# Initialize a classic SVM
clc = SVC(kernel='rbf')
# Use this only for tunning C of the SVM (From the experiments the best value of C is 100)
#C_tuned = CTune_SVM(clc, csr_matrix(X_train_bin), y_outliers_train, n_folds = 5, inf_pow = -4,  sup_pow = 4)
clc.C = 100
print("The selected value of C for the OutliersSVM is:  ", str(clc.C))
# Train the SVM with X_train
clc.fit(X_train, y_outliers_train)
print("Outliers-SVM trained .... ")
# Predict the labels of X_test
y_predicted_outliers = clc.predict(X_test)
print("Outliers-SVM test .... ")
F1, R, P = clf_performance(y_outliers_test, y_predicted_outliers, on_loop=0)

示例#18

0

显示文件

def loop_test(document_topic, X_text):

    address = '/home/juan-laptop/Dropbox/AIRO/Neural Networks/Results/'
    textfile = open(address + document_topic + '.txt', 'w')
    textfile.write(
        'SVM,n_features,representation,kernel,F1,P,R,Acc,FPR,TP,TN,FP,FN\n')

    n_features = np.array([10, 25, 50, 100])
    representations = np.array(['binary', 'Nfrequency', 'tf-idf', 'hadamard'])
    kernel = np.array(['linear', 'poly', 'rbf', 'sigmoid'])

    for i in n_features:

        print('Evualating n_features : ' + str(i))
        vectorizer = CountVectorizer(min_df=1,
                                     analyzer='word',
                                     stop_words='english')
        X_text_vec = vectorizer.fit_transform(X_text)
        # Transform an sparse matrix into a full matrix
        X_text_vec = X_text_vec.todense()
        X = reduce_features(X_text_vec, n_features=i)

        for j in representations:
            print('Document representation : ' + j)
            if j == 'binary':
                binarize_set(X)
            elif j == 'Nfrequency':
                transformer = TfidfTransformer(use_idf='False')
                X = transformer.fit_transform(X)
                X = X.todense()
            elif j == 'tf-idf':
                transformer = TfidfTransformer(use_idf='True')
                X = transformer.fit_transform(X)
                X = X.todense()
            elif j == 'hadamard':
                X = hadamard_product(X)

            X_train, X_train_filtered, X_test, y_outliers_train, y_outliers_test = dataset_split(
                X, 5, on_loop=1)

            for k in kernel:
                print('Kernel : ' + k)
                clf = svm.OneClassSVM(kernel=str(k),
                                      max_iter=1000000,
                                      cache_size=200,
                                      nu=0.2)
                clf.fit(X_train_filtered)
                y_predicted_OneClass = clf.predict(X_test)
                F1, P, R, Acc, FPR, TP, TN, FP, FN = clf_performance(
                    y_outliers_test, y_predicted_OneClass, on_loop=1)
                textfile.write('OneClass' + ',' + str(i) + ',' + j + ',' + k +
                               ',' + str(F1) + ',' + str(P) + ',' + str(R) +
                               ',' + str(Acc) + ',' + str(FPR) + ',' +
                               str(TP) + ',' + str(TN) + ',' + str(FP) + ',' +
                               str(FN) + '\n')

                clc = SVC(kernel=str(k))
                clc.C = 100
                clc.fit(X_train, y_outliers_train)
                y_predicted_outliers = clc.predict(X_test)
                F1, P, R, Acc, FPR, TP, TN, FP, FN = clf_performance(
                    y_outliers_test, y_predicted_outliers, on_loop=1)
                textfile.write('Outliers' + ',' + str(i) + ',' + j + ',' + k +
                               ',' + str(F1) + ',' + str(P) + ',' + str(R) +
                               ',' + str(Acc) + ',' + str(FPR) + ',' +
                               str(TP) + ',' + str(TN) + ',' + str(FP) + ',' +
                               str(FN) + '\n')

    textfile.close()
    print('finish!!')

示例#19

0

显示文件

        '''return accuracy value'''
        return correct_predictions / len(sample_vector)


if __name__ == '__main__':
    # data_formatter.save_files_as_numpy("rt-polarity.pos.txt", "rt-polarity.neg.txt")
    nlp = NLP()

    thresh_val_range = [[5, 2000], [25, 1000], [50, 500]]
    c_val_range = [0.0001, 1, 1000]
    # nlp.train_on_grid_search(thresh_val_range, c_val_range)
    '''predicting results on test set'''
    bag_of_words = nlp.generate_bag_of_words(nlp.vocabulary,
                                             nlp.vocab_lower_thresh,
                                             nlp.vocab_upper_thresh)
    training_vector = data_formatter.vectorize(nlp.x_train, bag_of_words)
    testing_vector = data_formatter.vectorize(nlp.x_test, bag_of_words)
    svm = SVC(kernel='linear', random_state=1)
    svm.C = nlp.c_val
    print("Training svm...")
    svm.fit(training_vector, nlp.y_train)
    print("Predicting model...")
    test_predictions = svm.predict(testing_vector)
    correct_predictions = 0
    for i, _ in enumerate(test_predictions):
        if test_predictions[i] == nlp.y_test[i]:
            correct_predictions += 1

    print("Final Accuracy: %" +
          str(correct_predictions / len(test_predictions) * 100))

示例#20

0

显示文件

文件： svm_text_analyze.py 项目： abonec/python_machine_learning

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(newsgroup.data)
y = newsgroup.target

C = np.power(10.0, np.arange(-5, 6))
grid = {'C': C}
k_folder = KFold(X.shape[0], n_folds=5, shuffle=True, random_state=241)
clf = SVC(kernel='linear', random_state=241)
grid_search = GridSearchCV(clf, grid, scoring='accuracy', cv=k_folder)
grid_search.fit(X, y)

optimal_parameters = {}
max_score = max(x.mean_validation_score for x in grid_search.grid_scores_)
optimal_c = next(x.parameters['C'] for x in grid_search.grid_scores_ if x.mean_validation_score == max_score)

clf.C = optimal_c
clf.fit(X, y)

feature_mappings = vectorizer.get_feature_names()
result = {
    'words': list(feature_mappings[i] for i in clf.coef_.indices),
    'values': list(abs(weight) for weight in clf.coef_.data),
}
coef = DataFrame(data=result)
coef = coef.sort_values(by='values', ascending=False)

words = coef.head(10)['words'].values.tolist()

output = " ".join(sorted(words))
coursera.output("svm_text_analyze.txt", output)

示例#21

0

显示文件

文件： svm_sklearn.py 项目： yxdongshine/depthLearning

#4.1 GridSearchCV
#超参数
tuned_parameters = [{
    'kernel': ['rbf'],
    'gamma': [1e-3, 1e-4]
}, {
    'kernel': ['linear'],
    'degree': [3, 5, 7, 9]
}]
clf = SVC()
C_s = np.logspace(1, 10, 100, 1000)
scores = list()
scores_std = list()
for C in C_s:
    #交叉验证
    clf.C = C
    this_scores = cross_val_score(clf, X_stand, Y, cv=5)
    scores.append(np.mean(this_scores))
    scores_std.append(np.std(this_scores))

    #GridSearchCV选取超参数
    clfg = GridSearchCV(SVC(), tuned_parameters, cv=5)
    clfg.c = C
    clfg.fit(X_stand, Y)
    print("Best parameters set found on development set:")
    print()
    print(clfg.best_params_)
    print()
    print("Grid scores on development set:")
    print()
    means = clfg.cv_results_['mean_test_score']

示例#22

0

显示文件

	poly.degree=i
	for j in coef_list : 
		print k
		poly.coef0=j
		score1=cross_val_score(poly,Xtrain,ytrain,cv=5)
		score.append(np.mean(score1))
		if k==0 : 
			i1=i
			j1=j
		if k>0 and np.mean(score1)>score[k-1]  : 
			i1=i
			j1=j
		print(i,j,np.mean(score1))
		k=k+1'''
for i in c_list:
    poly.C = i
    score1 = cross_val_score(poly, Xtrain, ytrain, cv=5)
    score.append(np.mean(score1))
    if k == 0:
        i1 = i

    if k > 0 and np.mean(score1) > score[k - 1]:
        i1 = i
    print(k, i, ':', np.mean(score1))
'''0
(2, 0, 0.054740548179391016)
1
(2, 1, 0.1493362573941846)
2
(2, 2, 0.18009567595229553)
3