def parse_dummy_list(self, dlist):
     while ",," in dlist or "[," in dlist:
         dlist = dlist.replace(",,", ",None,").replace("[,", "[None,")
     try:    
         return safe_eval(dlist)
     except SyntaxError:
         return []
示例#2
0
 def parse_dummy_list(self, dlist):
     while ",," in dlist or "[," in dlist:
         dlist = dlist.replace(",,", ",None,").replace("[,", "[None,")
     try:
         return safe_eval(dlist)
     except SyntaxError:
         return []
COLUMN_TYPES = {'train': {'price': 'float64',
                          'item_seq_number': 'uint32',
                          'image_top_1': 'float64',
                          'deal_probability': 'float32',
                          },
                'inference': {'price': 'float64',
                              'item_seq_number': 'uint32',
                              'image_top_1': 'float64',
                              }
                }

SOLUTION_CONFIG = AttrDict({
    'env': {'cache_dirpath': params.experiment_dir
            },
    'random_search': {'light_gbm': {'n_runs': safe_eval(params.lgbm_random_search_runs),
                                    'callbacks': {'neptune_monitor': {'name': 'light_gbm'
                                                                      },
                                                  'save_results': {'filepath': os.path.join(params.experiment_dir,
                                                                                            'random_search_light_gbm.pkl')
                                                                   }
                                                  }
                                    }
                      },
    'dataframe_by_type_splitter': {'numerical_columns': NUMERICAL_COLUMNS,
                                   'categorical_columns': CATEGORICAL_COLUMNS,
                                   'timestamp_columns': TIMESTAMP_COLUMNS,
                                   },

    'groupby_aggregation': {'groupby_aggregations': [
        {'groupby': ['user_id'], 'select': 'price', 'agg': 'mean'},
示例#4
0
        'ip': 'uint32',
        'app': 'uint16',
        'device': 'uint16',
        'os': 'uint16',
        'channel': 'uint16',
        'click_id': 'uint32'
    }
}

SOLUTION_CONFIG = AttrDict({
    'env': {
        'cache_dirpath': params.experiment_dir
    },
    'random_search': {
        'light_gbm': {
            'n_runs': safe_eval(params.lgbm_random_search_runs),
            'callbacks': {
                'neptune_monitor': {
                    'name': 'light_gbm'
                },
                'save_results': {
                    'filepath':
                    os.path.join(params.experiment_dir,
                                 'random_search_light_gbm.pkl')
                }
            }
        },
        'xgboost': {
            'n_runs': safe_eval(params.xgboost_random_search_runs),
            'callbacks': {
                'neptune_monitor': {
ID_COLUMNS = ['SK_ID_CURR']
TARGET_COLUMNS = ['TARGET']

DEV_SAMPLE_SIZE = int(10e4)

SOLUTION_CONFIG = AttrDict({
    'env': {
        'cache_dirpath': params.experiment_dir
    },
    'dataframe_by_type_splitter': {
        'numerical_columns': NUMERICAL_COLUMNS,
        'categorical_columns': CATEGORICAL_COLUMNS,
        'timestamp_columns': TIMESTAMP_COLUMNS,
    },
    'light_gbm': {
        'boosting_type': safe_eval(params.lgbm__boosting_type),
        'objective': safe_eval(params.lgbm__objective),
        'metric': safe_eval(params.lgbm__metric),
        'learning_rate': safe_eval(params.lgbm__learning_rate),
        'max_depth': safe_eval(params.lgbm__max_depth),
        'subsample': safe_eval(params.lgbm__subsample),
        'colsample_bytree': safe_eval(params.lgbm__colsample_bytree),
        'min_child_weight': safe_eval(params.lgbm__min_child_weight),
        'reg_lambda': safe_eval(params.lgbm__reg_lambda),
        'reg_alpha': safe_eval(params.lgbm__reg_alpha),
        'subsample_freq': safe_eval(params.lgbm__subsample_freq),
        'max_bin': safe_eval(params.lgbm__max_bin),
        'min_child_samples': safe_eval(params.lgbm__min_child_samples),
        'num_leaves': safe_eval(params.lgbm__num_leaves),
        'nthread': safe_eval(params.num_workers),
        'number_boosting_rounds':
ID_COLUMNS = ['SK_ID_CURR']
TARGET_COLUMNS = ['TARGET']

DEV_SAMPLE_SIZE = int(10e4)

SOLUTION_CONFIG = AttrDict({
    'env': {'cache_dirpath': params.experiment_dir
            },

    'dataframe_by_type_splitter': {'numerical_columns': NUMERICAL_COLUMNS,
                                   'categorical_columns': CATEGORICAL_COLUMNS,
                                   'timestamp_columns': TIMESTAMP_COLUMNS,
                                   },

    'light_gbm': {'boosting_type': safe_eval(params.lgbm__boosting_type),
                  'objective': safe_eval(params.lgbm__objective),
                  'metric': safe_eval(params.lgbm__metric),
                  'learning_rate': safe_eval(params.lgbm__learning_rate),
                  'max_depth': safe_eval(params.lgbm__max_depth),
                  'subsample': safe_eval(params.lgbm__subsample),
                  'colsample_bytree': safe_eval(params.lgbm__colsample_bytree),
                  'min_child_weight': safe_eval(params.lgbm__min_child_weight),
                  'reg_lambda': safe_eval(params.lgbm__reg_lambda),
                  'reg_alpha': safe_eval(params.lgbm__reg_alpha),
                  'subsample_freq': safe_eval(params.lgbm__subsample_freq),
                  'max_bin': safe_eval(params.lgbm__max_bin),
                  'min_child_samples': safe_eval(params.lgbm__min_child_samples),
                  'num_leaves': safe_eval(params.lgbm__num_leaves),
                  'nthread': safe_eval(params.num_workers),
                  'number_boosting_rounds': safe_eval(params.lgbm__number_boosting_rounds),
                          'channel': 'uint16',
                          'is_attributed': 'uint8',
                          },
                'inference': {'ip': 'uint32',
                              'app': 'uint16',
                              'device': 'uint16',
                              'os': 'uint16',
                              'channel': 'uint16',
                              'click_id': 'uint32'
                              }
                }

SOLUTION_CONFIG = AttrDict({
    'env': {'cache_dirpath': params.experiment_dir
            },
    'random_search': {'light_gbm': {'n_runs': safe_eval(params.lgbm_random_search_runs),
                                    'callbacks': {'neptune_monitor': {'name': 'light_gbm'
                                                                      },
                                                  'save_results': {'filepath': os.path.join(params.experiment_dir,
                                                                                            'random_search_light_gbm.pkl')
                                                                   }
                                                  }
                                    },
                      'xgboost': {'n_runs': safe_eval(params.xgboost_random_search_runs),
                                  'callbacks': {'neptune_monitor': {'name': 'xgboost'
                                                                    },
                                                'save_results': {'filepath': os.path.join(params.experiment_dir,
                                                                                          'random_search_xgboost.pkl')
                                                                 }
                                                }
                                  },