示例#1
0
def plot_feature_importances_this(X_train, Y_repeater, Y_numRepeats, Y_quantiles, num_features):
    """
    @param num_features: number of features for [repeater, numrepeats, quantiles]
    """

    fields_repeater = plot_feature_importances(np.array(X_train), Y_repeater, labels=X_train.columns, numTopFeatures=num_features[0], title='Repeater')[1]
    fields_numRepeats = plot_feature_importances(np.array(X_train), Y_numRepeats, labels=X_train.columns, numTopFeatures=num_features[1], title='Number of Repeats')[1]
    fields_quantiles = plot_feature_importances(np.array(X_train), Y_quantiles, labels=X_train.columns, numTopFeatures=num_features[2], title='Quantiles of Number of Repeats')[1]

    return fields_repeater, fields_numRepeats, fields_quantiles
示例#2
0
if __name__ == '__main__':



    x_train, y_train, _, columns_train, weights, y_class = \
        process_data('/home/jj/code/Kaggle/Fire/Data/train.csv',
                     impute=True, imputeDataDir='/home/jj/code/Kaggle/Fire/intermediateOutput', imputeStrategy='median',
                     fieldsToUse=['var11', 'var8', 'var13'])
                     # fieldsToUse=FIELDS_CLASS_GBC_TOP100[:30])
                     # fieldsToUse=FIELDS_CDF_CORR_TOP99[:19])
    # y_cdfs = np.array(pandas.read_csv('/home/jj/code/Kaggle/Fire/Data/y_pcdfs.csv')).reshape(NUM_TRAIN_SAMPLES,)[:len(y_train)]  # in case smallTrain is used
    # clf = GradientBoostingRegressor(loss='quantile', learning_rate=0.02, n_estimators=100, subsample=0.9)
    # clf = LogisticRegression()

    plot_feature_importances(x_train, np.array(y_train), columns_train, numTopFeatures=3, numEstimators=50)

    classifier = SVR(kernel='rbf')
    regressor = Ridge(alpha=1)
    classFields = fieldNamesToInd(columns_train, FIELDS_CLASS_GBC_TOP100[:20])
    regFields = fieldNamesToInd(columns_train, FIELDS_CORR_ORDERED_TOP99[:20])

    # clf = GroupThenRegress(list(columns_train).index('var8'),
    #                        Ridge(alpha=1, normalize=False),
    #                        verbose=1)

    # clf = SVR()

    # ================== CORRELATION ==================
    # print '================== CORRELATION =================='
    # print x_train.shape