示例#1
0
def performPrediction(allFeatures, allLabels, featureNames):
    '''
    first do PCA
    '''
    selected_features = None  ## initialization
    pcaObj = decomposition.PCA(n_components=pca_comp)
    pcaObj.fit(allFeatures)
    # variance of features
    variance_of_features = pcaObj.explained_variance_
    # how much variance is explained each component
    variance_ratio_of_features = pcaObj.explained_variance_ratio_
    totalvarExplained = float(0)
    for index_ in xrange(len(variance_ratio_of_features)):
        var_exp_ = variance_ratio_of_features[index_]
        totalvarExplained = totalvarExplained + var_exp_
        print "Prin. comp#{}, ( indi) explained variance:{}, total explained variance:{}".format(
            index_ + 1, var_exp_, totalvarExplained)

    no_features_to_use = for_feature_selection
    print "Of all the features, we will use:", no_features_to_use
    print "-" * 50
    pcaObj.n_components = no_features_to_use
    selected_features = pcaObj.fit_transform(allFeatures)
    print "Selected feature dataset size:", np.shape(selected_features)
    print "-" * 50
    printPCAInsights(pcaObj, topComponentCount, featureNames)
    print "-" * 50
    '''
    lets start prediction , now that we ahve feature selection otu of the way
    '''
    sklearn_models.performModeling(selected_features, allLabels, 10)
示例#2
0
    all_features, all_labels)
### use randomized logi. regression to get the features ::: as this performs worse then l1-penalized , it wil not be used
# selected_indices_for_features = sklearn_models.getElgiibleFeatures(all_features, all_labels)
print "Total selected feature count:", len(selected_indices_for_features)
print "The selected feature names: ", Utility.printFeatureName(
    selected_indices_for_features, True)  ##True for enbaling steroid headers
print "-" * 50
### select the features based on feature indicies
selected_features = Utility.createSelectedFeatures(
    all_features, selected_indices_for_features)
print "Selected feature dataset size:", np.shape(selected_features)
print "Glimpse at  selected features (11th entry in label list): \n", selected_features[
    glimpseIndex]
print "-" * 50
fold2Use = 10
'''
Single iteration zone : turn off 'performIterativeModeling()'
while running this 
'''
# this method runs the classifiers once
sklearn_models.performModeling(selected_features, all_labels, fold2Use)
print "-" * 50
'''
Multiple iteration zone : turn off 'performModeling()'
while running this 
'''
# this method runs the classifiers 'iteration' number of times
# iteration=1000
# sklearn_models.performIterativeModeling(selected_features, all_labels, fold2Use, iteration)
# print "-"*50
print "Ended at:", Utility.giveTimeStamp()