Пример #1
0
if __name__ == '__main__':
    train_df, test_df = fe.read_df()
    sub_df = pd.read_csv('data/sample_submit.csv', header=None)
    sub_df.iloc[:, 1] = np.zeros(len(sub_df))

    train_df = preprocess(train_df)
    test_df = preprocess(test_df)
    features = [
        'duration', 'goal_min', 'goal_max', 
        'country_encoding', 'category1_encoding', 'category2_encoding'
    ]
    target = 'state'


    cv = 10
    train_dfs, valid_dfs, test_dfs = training.cv(train_df, cv)
    scores = list()
    params = None
    tune = True
    name = 'target_encoding'
    for cv_idx in range(cv):
        'prepare'
        x_train, y_train = train_dfs[cv_idx][features], train_dfs[cv_idx][target]
        x_valid, y_valid = valid_dfs[cv_idx][features], valid_dfs[cv_idx][target]
        x_test, y_test = test_dfs[cv_idx][features], test_dfs[cv_idx][target]
        
        # add feature from baseline
        x_train, x_valid, x_test, x_sub = add_group_feature(x_train, x_valid, x_test, test_df[features], y_train)


        lgb_train, lgb_valid = lgb.Dataset(x_train, y_train, categorical_feature=[3, 4, 5], free_raw_data=False), lgb.Dataset(x_valid, y_valid, categorical_feature=[3, 4, 5], free_raw_data=False)
Пример #2
0
    # train_df = preprocess(train_df)
    train_feature = fe.bert_feature(train_df)
    train_feature['state'] = train_df.state
    test_feature = fe.bert_feature(test_df)
    # test_df = preprocess(test_df)
    # features = [
    #     'html_content'
    # ]
    features = list(test_feature.columns)
    target = 'state'

    print(train_feature.shape, test_feature.shape)
    
    cv = 10
    train_dfs, valid_dfs, test_dfs = training.cv(train_feature, cv)
    scores = list()
    params = None
    tune = True
    name = 'bert_feature'
    for cv_idx in range(cv):
        'prepare'
        x_train, y_train = train_dfs[cv_idx][features], train_dfs[cv_idx][target]
        x_valid, y_valid = valid_dfs[cv_idx][features], valid_dfs[cv_idx][target]
        x_test, y_test = test_dfs[cv_idx][features], test_dfs[cv_idx][target]
        
        lgb_train, lgb_valid = lgb.Dataset(x_train, y_train, free_raw_data=False), lgb.Dataset(x_valid, y_valid, free_raw_data=False)

        'train'
        if tune:
            params = training.tuning(lgb_train, lgb_valid, 100)