Пример #1
0
# y_predicted_report_rfecv_cv=[]
# y_predicted_report_rfecv_cv,y_test_report_rfecv_cv=cl.do_cross_validation(classifier,cv,all_feature_matrix,y,index_num)
#  
# ####### Compute confusion matrix and classsfication report  #######
# cl.print_confusion_matrix(y_test_report_rfecv_cv, y_predicted_report_rfecv_cv,"rfecv_cvloop")
# cl.print_classification_report(y_test_report_rfecv_cv, y_predicted_report_rfecv_cv,['class 0', 'class 1'])

print("############################################")

folds=5
cv = StratifiedKFold(y_eval, n_folds=folds)
classifier = svm.SVC(kernel='linear', probability=True)

###################### Feature selection using RFECV only #################################################

only_feature_selection,index_arr_onlyfs=cl.select_optimal_features(X_cv_normalized_matrix,y_cv,classifier)
print("number of features selected only with rfecv: " +str(len(index_arr_onlyfs)))
index_num_fs_only,index_freq_fs_only=cl.sort_and_combine_feature_indices(index_arr_onlyfs)
print("index numbers are: " + str(index_num_fs_only))

rw.write_features_to_file(index_num_fs_only,output_folder,"rfecv_selected13_features.txt")
#print("index freq are: " + str(index_freq_fs_only))


####### print features selected by rfecv alone #########################################
rfecv_only_feature_arr=[]
for val in index_num_fs_only:
    #print val
    #print (inv_global_vocab[val])
    rfecv_only_feature_arr.append(inv_global_vocab[val])
    
Пример #2
0
    normalized_matrix_train = cl.normalise_mean_var(all_feature_matrix[train])
    normalised_matrix_test = cl.normalise_mean_var(all_feature_matrix[test])

    y_predicted2 = []

    #####
    #for clf,name_clf in zip(classifiers
    #create a pipeline
    # wrapper_filter= somefilter
    # pipe_line = Pipeline([('wrapper', wrapper_selection), (name_clf, clf)])
    # pipe_line.fit(normalized_matrix_train,y[train])
    #pipe_line.score()
    #####

    #select features using rfecv only on train data
    only_feature_selection_matrix, index_arr_onlyfs = cl.select_optimal_features(
        normalized_matrix_train, y[train], classifier)

    #index_num,index_freq=cl.sort_and_combine_feature_indices(index_arr_onlyfs)
    for val in index_arr_onlyfs:
        #print ("val is: " +str(val))
        print(inv_global_vocab[val])

    #index_num_fs_only,index_freq_fs_only=cl.sort_and_combine_feature_indices(index_arr_onlyfs)

    matrix_for_train = cl.make_new_matrix(index_arr_onlyfs,
                                          normalized_matrix_train)
    #classifier.fit(matrix_for_train, y[train])

    matrix_for_test = cl.make_new_matrix(index_arr_onlyfs,
                                         normalised_matrix_test)
    probas_ = classifier.fit(matrix_for_train,
Пример #3
0
# y_predicted_report_rfecv_cv=[]
# y_predicted_report_rfecv_cv,y_test_report_rfecv_cv=cl.do_cross_validation(classifier,cv,all_feature_matrix,y,index_num)
#
# ####### Compute confusion matrix and classsfication report  #######
# cl.print_confusion_matrix(y_test_report_rfecv_cv, y_predicted_report_rfecv_cv,"rfecv_cvloop")
# cl.print_classification_report(y_test_report_rfecv_cv, y_predicted_report_rfecv_cv,['class 0', 'class 1'])

print("############################################")

folds = 5
cv = StratifiedKFold(y_eval, n_folds=folds)
classifier = svm.SVC(kernel='linear', probability=True)

###################### Feature selection using RFECV only #################################################

only_feature_selection, index_arr_onlyfs = cl.select_optimal_features(
    X_cv_normalized_matrix, y_cv, classifier)
print("number of features selected only with rfecv: " +
      str(len(index_arr_onlyfs)))
index_num_fs_only, index_freq_fs_only = cl.sort_and_combine_feature_indices(
    index_arr_onlyfs)
print("index numbers are: " + str(index_num_fs_only))

rw.write_features_to_file(index_num_fs_only, output_folder,
                          "rfecv_selected13_features.txt")
#print("index freq are: " + str(index_freq_fs_only))

####### print features selected by rfecv alone #########################################
rfecv_only_feature_arr = []
for val in index_num_fs_only:
    #print val
    #print (inv_global_vocab[val])
 normalised_matrix_test=cl.normalise_mean_var(all_feature_matrix[test])
 
 y_predicted2=[]
 
 
 #####
 #for clf,name_clf in zip(classifiers
 #create a pipeline
 # wrapper_filter= somefilter 
 # pipe_line = Pipeline([('wrapper', wrapper_selection), (name_clf, clf)])
 # pipe_line.fit(normalized_matrix_train,y[train])
 #pipe_line.score()
 #####
 
 #select features using rfecv only on train data
 only_feature_selection_matrix,index_arr_onlyfs=cl.select_optimal_features(normalized_matrix_train,y[train],classifier)
 
 #index_num,index_freq=cl.sort_and_combine_feature_indices(index_arr_onlyfs)
 for val in index_arr_onlyfs:
     #print ("val is: " +str(val))
     print (inv_global_vocab[val])
 
 
 #index_num_fs_only,index_freq_fs_only=cl.sort_and_combine_feature_indices(index_arr_onlyfs)
 
 
 
 matrix_for_train=cl.make_new_matrix(index_arr_onlyfs,normalized_matrix_train)  
 #classifier.fit(matrix_for_train, y[train]) 
 
 matrix_for_test=cl.make_new_matrix(index_arr_onlyfs,normalised_matrix_test)