Python FunctionLib.get_aggregate_features_num示例

编程语言: Python

命名空间/包名称: Model

类/类型: FunctionLib

方法/功能: get_aggregate_features_num

hotexamples.com的示例: 3

Python FunctionLib.get_aggregate_features_num - 已找到3个示例。这些是从开源项目中提取的最受好评的Model.FunctionLib.get_aggregate_features_num现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

get_params(8)

distinct_feats(7)

change_type(7)

get_missing_value_feats(6)

ScoreDataFrame(3)

get_aggregate_features_num(3)

get_model_performance(3)

TurkyOutliers(2)

impute_knn_classifier(2)

GetScaledModel(2)

get_rowcnt_most_missing_val(2)

GetBasedModel(2)

cv_score(2)

corr_feats(2)

GetScaledModelwithfactorizedCW(2)

plot_bar(2)

missing_val_perc(2)

impute_values(2)

log_transform(2)

PlotBoxR(2)

match_strings(1)

hist_perc(1)

hist_compare(1)

get_unique_val_list(1)

plot_stats(1)

min_len_col(1)

AdaBoostClassifier(1)

get_corr(1)

feature_stats(1)

default_ratio(1)

cv_metrics(1)

concat_model_score(1)

RandomSearch(1)

RandomForestClassifier(1)

LogisticRegression(1)

KNeighborsClassifier(1)

GridSearch(1)

GradientBoostingClassifier(1)

GetScaledModelwithbestparams(1)

train_test_split(1)

示例#1

显示文件

文件： Preprocessing.py 项目： rkparyani/KAGGLE---Home-Credit-Default-Risk

    def _num_feature_extraction(self, num_feats_b, b_df, b_agg_df, p_id):

        for feature in num_feats_b:
            print(feature)
            b_agg_df = f.get_aggregate_features_num(b_df, b_agg_df, feature,
                                                    p_id)
            #    na_ind = b_agg_df[(b_agg_df[feature + '_std'].isna()==True) &
            #                  ((b_agg_df[feature+'_mean'])==(b_agg_df[feature+'_median']))].index
            #
            #    b_agg_df.loc[na_ind][feature+'_std'].fillna(0)
            #    b_agg_df.loc[na_ind][feature'_std'].isna().sum()
            b_agg_df[feature + '_std'] = np.where(
                (b_agg_df[feature + '_std'].isna() == True) &
                ((b_agg_df[feature + '_mean'])
                 == (b_agg_df[feature + '_median'])), 0,
                b_agg_df[feature + '_std'])
        b_agg_df.insert(0, p_id, b_agg_df.index)
        b_agg_df.reset_index(drop=True, inplace=True)
        return b_agg_df

示例#2

显示文件

文件： Preprocessing_bureau.py 项目： rkparyani/KAGGLE---Home-Credit-Default-Risk

# As the features are expected to be extracted and grouped at SK_ID_CURR level 
# to synchronise at the Loan Application Client level.Hence we need to extract 
# aggregated information at Client level out of the dataset
# This means that treatment to individual columns would not be generallised as 
# we might loose information. Hence we will extract aggregated features first and then
# apply data correction (MISSING VALUES and OUTLIERS etc) at aggregate level based on the
# features qualitatively

b_agg_df = pd.DataFrame() 
# Create a object for aggregation at SK_ID_CURR level
#b_agg = b_df.groupby('SK_ID_CURR')

#Aggregating bureau data at Customer Id level

for feature in num_feats_b:  
    b_agg_df = f.get_aggregate_features_num(b_df,b_agg_df, feature,'SK_ID_CURR')
#    na_ind = b_agg_df[(b_agg_df[feature + '_std'].isna()==True) & 
#                  ((b_agg_df[feature+'_mean'])==(b_agg_df[feature+'_median']))].index
#
#    b_agg_df.loc[na_ind][feature+'_std'].fillna(0)
#    b_agg_df.loc[na_ind][feature'_std'].isna().sum()
    b_agg_df[feature+'_std'] = np.where((b_agg_df[feature+'_std'].isna()==True) & 
            ((b_agg_df[feature+'_mean'])==(b_agg_df[feature+'_median'])), 
         0, 
         b_agg_df[feature+'_std'])
b_agg_df.insert(0,'SK_ID_CURR',b_agg_df.index)
b_agg_df.reset_index(drop=True)

for feature in cat_feats_b:    
    b_agg_cat = b_df.groupby('SK_ID_CURR')[feature].value_counts()
    for i in b_df[feature].unique():

示例#3

显示文件

文件： Preprocessing_pos_cash_bal.py 项目： rkparyani/KAGGLE---Home-Credit-Default-Risk

# As the features are expected to be extracted and grouped at SK_ID_CURR level
# to synchronise at the Loan Application Client level.Hence we need to extract
# aggregated information at Client level out of the dataset
# This means that treatment to individual columns would not be generallised as
# we might loose information. Hence we will extract aggregated features first and then
# apply data correction (MISSING VALUES and OUTLIERS etc) at aggregate level based on the
# features qualitatively

p_agg_df = pd.DataFrame()
# Create a object for aggregation at SK_ID_CURR level
#b_agg = b_df.groupby('SK_ID_CURR')

#Aggregating bureau data at Customer Id level

for feature in num_feats_p:
    p_agg_df = f.get_aggregate_features_num(p_df, p_agg_df, feature,
                                            'SK_ID_CURR')
    p_agg_df[feature + '_std'] = np.where(
        (p_agg_df[feature + '_std'].isna() == True) &
        ((p_agg_df[feature + '_mean']) == (p_agg_df[feature + '_median'])), 0,
        p_agg_df[feature + '_std'])
p_agg_df.insert(0, 'SK_ID_CURR', p_agg_df.index)
p_agg_df.reset_index(drop=True)

for feature in cat_feats_p:
    p_agg_cat = p_df.groupby('SK_ID_CURR')[feature].value_counts()
    for i in p_df[feature].unique():
        p_agg_df[feature + '_' + i + '_count'] = p_agg_cat.xs(key=i, level=1)
        p_agg_df[feature + '_' + i + '_count'].fillna(value=0, inplace=True)

# Assuming the NA values where Bureau does not have the data which mean that
# in such scenarios the client does not have that entry which mean Zero