Пример #1
0
def select_feature(trainfilename, testfilename):
    def returnTtest(X, y):
        return tvalue

    X_train, y_train, X_test, y_test = load_svmlight_files(
        (trainfilename, testfilename), multilabel=True)

    featureNum = X_train.get_shape()[1]
    tvalue = ttest(X_train, y_train)

    step = featureNum / 20
    for i in range(1, 21):
        selectNum = step * i
        print "selecting", selectNum, "features"
        selector = SelectKBest(returnTtest, k=selectNum)
        X_train_new = selector.fit_transform(X_train, y_train)
        X_test_new = selector.transform(X_test)
        dump_svmlight_file(X_train_new,
                           y_train,
                           trainfilename + '_' + str(selectNum),
                           zero_based=False)
        dump_svmlight_file(X_test_new,
                           y_test,
                           testfilename + '_' + str(selectNum),
                           zero_based=False)
Пример #2
0
def select_feature(trainfilename, testfilename):
    def returnCHI(X, y):
        return chivalue
    X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename), multilabel=True)
    
    featureNum = X_train.get_shape()[1]
    chivalue = chi2(X_train, y_train)

    step = featureNum / 20;
    for i in range(1, 21):
        selectNum = step * i
        print "selecting", selectNum, "features"
        selector = SelectKBest(chi2, k=selectNum)
        X_train_new = selector.fit_transform(X_train, y_train)
        X_test_new= selector.transform(X_test)
        dump_svmlight_file(X_train_new, y_train, trainfilename + '_' + str(selectNum), zero_based = False)
        dump_svmlight_file(X_test_new, y_test, testfilename + '_' + str(selectNum), zero_based = False)
Пример #3
0
def select_feature_multilabel(trainfilename, testfilename):
    def returnIG(X, y):
        return randval, p
    X_train, y_train, X_test, y_test = load_svmlight_files((trainfilename, testfilename),  multilabel=True)

    featurenum = X_train.shape[1]
    randval = randomValues(X_train, y_train)
    p = np.ones((featurenum,1), int)
    p.reshape(featurenum,1)

    featureNum = X_train.get_shape()[1]
    step = featureNum / 20;
    for i in range(1, 21):
        selectNum = step * i
        print "selecting", selectNum, "features"
        selector = SelectKBest(returnIG, k=selectNum)
        X_train_new = selector.fit_transform(X_train, y_train)
        X_test_new = selector.transform(X_test)
        dump_svmlight_file(X_train_new, y_train, trainfilename + '_' + str(selectNum), zero_based = False)
        dump_svmlight_file(X_test_new, y_test, testfilename + '_' + str(selectNum), zero_based = False)