Exemplo n.º 1
0
cv.features_sel_del(
    model_id,
    23,
    early_stop_cv=lambda x: x < 0.557,  # minmax
    log_file='workdir/logs/data23_sub_cols3.txt',
    exclude=[
        'data__daily__data_vol_mb__median__max',
        'data__daily__cell_count__min',
        'voice__hourly__voice_dur_min__sum__max',
        'voice__daily__voice_dur_min__sum__median',
        'data__daily__data_vol_mb__sum__min',
        'data__daily__data_vol_mb__avg__avg',
        'voice__hourly__voice_dur_min__median__max',
        'voice__hourly__voice_dur_min__sum__min',
        'voice__daily__cell_count__median',
        'voice__daily__voice_dur_min__sum__max',
        'voice__cell_lac_id__count',
        'data__daily__data_vol_mb__max__min',
        'voice__voice_dur_min__days__count',
        'voice__daily__voice_dur_min__avg__max',
        'data__daily__cell_count__median',
        'voice__hourly__cell_count__avg',
        'voice__hourly__cell_count__median',
        'data__hourly__data_vol_mb__avg__median',
        'voice__hourly__voice_dur_min__sum__avg',
        'data__hourly__data_vol_mb__median__max',
        'data__hourly__data_vol_mb__avg__max',
        'voice__voice_dur_min__avg',
        'data__daily__data_vol_mb__sum__avg',
    ],
    columns=[
        'voice__voice_dur_min__max',
        'voice__daily__voice_dur_min__sum__min',
        'voice__daily__cell_count__avg',
        'voice__daily__voice_dur_min__avg__median',
        'data__daily__data_vol_mb__median__avg',
        'voice__voice_dur_min__sum',
        'voice__hourly__voice_dur_min__median__avg',
        'voice__hourly__cell_count__max',
        'data__daily__data_vol_mb__avg__median',
        'data__daily__cell_count__max',
        'data__daily__data_vol_mb__median__min',
        'voice__voice_dur_min__hours__count',
        'voice__daily__voice_dur_min__max__avg',
        'voice__hourly__voice_dur_min__median__median',
        # 'voice__hourly__voice_dur_min__avg__max',
        # 'voice__daily__voice_dur_min__max__min',
        # 'data__daily__data_vol_mb__max__max',
        # 'data__data_vol_mb__avg',
        # 'data__hourly__data_vol_mb__median__min',
        # 'data__cell_lac_id__count',
        # 'data__hourly__cell_count__avg',
        # 'data__hourly__data_vol_mb__max__median',
        # 'data__data_vol_mb__sum',
        # 'voice__daily__voice_dur_min__median__max',
        # 'data__daily__data_vol_mb__sum__median',
        # 'data__hourly__cell_count__min',
        # 'voice__hourly__voice_dur_min__max__min',
        # 'data__hourly__data_vol_mb__median__avg',
        # 'voice__hourly__voice_dur_min__median__min',
        # 'data__hourly__data_vol_mb__sum__min',
        # 'data__daily__data_vol_mb__max__median',
        # 'voice__daily__voice_dur_min__median__median',
        # 'voice__daily__voice_dur_min__avg__avg',
        # 'data__data_vol_mb__median',
        # 'data__daily__data_vol_mb__sum__max',
        # 'data__data_vol_mb__hours__count',
        # 'data__daily__data_vol_mb__median__median',
        # 'voice__hourly__cell_count__min',
        # 'data__daily__cell_count__avg',
        # 'data__hourly__cell_count__median',
        # 'data__hourly__data_vol_mb__sum__avg',
        # 'voice__daily__voice_dur_min__max__median',
        # 'voice__hourly__voice_dur_min__max__avg',
        # 'data__daily__data_vol_mb__avg__min',
        # 'data__daily__data_vol_mb__max__avg',
        # 'data__hourly__data_vol_mb__sum__max',
        # 'data__data_vol_mb__max',
        # 'voice__hourly__voice_dur_min__max__max',
        # 'voice__hourly__voice_dur_min__sum__median',
        # 'voice__hourly__voice_dur_min__max__median',
        # 'data__hourly__data_vol_mb__avg__avg',
        # 'data__data_vol_mb__days__count',
        # 'voice__daily__cell_count__min',
        # 'data__hourly__data_vol_mb__max__avg',
        # 'voice__daily__voice_dur_min__median__min',
        # 'voice__hourly__voice_dur_min__avg__median',
        # 'voice__daily__voice_dur_min__avg__min',
        # 'data__hourly__data_vol_mb__avg__min',
        # 'data__hourly__data_vol_mb__max__max',
        # 'voice__hourly__voice_dur_min__avg__avg',
        # 'voice__daily__voice_dur_min__max__max',
        # 'data__hourly__data_vol_mb__median__median',
        # 'data__hourly__data_vol_mb__sum__median',
        # 'voice__voice_dur_min__median',
        # 'data__hourly__data_vol_mb__max__min',
        # 'data__hourly__cell_count__max',
        # 'voice__hourly__voice_dur_min__avg__min',
        # 'voice__daily__voice_dur_min__median__avg',
        # 'voice__daily__voice_dur_min__sum__avg',
        # 'data__daily__data_vol_mb__avg__max',
        # 'voice__daily__cell_count__max',
    ])
Exemplo n.º 2
0
from hyperopt import hp, fmin, tpe
import os
import sys

sys.path.insert(0, os.getcwd())
import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm

cv = QCV(qm)

# model_id = qm.add_by_params(
#     QXgb(
# ** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss",
#     "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"}
#     ),
#     'hyperopt xgb', level=-1
# )

model_id = qm.add_by_params(QAvgOneModelData(416, 2), level=-2)

cv.features_sel_del(
    model_id,
    13,
    early_stop_cv=lambda x: x < 0.53,  # minmax
    log_file='workdir/logs/data13_sub_cols2.txt',
    exclude=[],
    columns=[])
Exemplo n.º 3
0
import datetime
import numpy as np

from hyperopt import hp, fmin, tpe
import os
import sys
sys.path.insert(0, os.getcwd())
import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm



cv = QCV(qm)

# model_id = qm.add_by_params(
#     QXgb(
# ** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss",
#     "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"}
#     ),
#     'hyperopt xgb', level=-1
# )

model_id =qm.add_by_params(QAvgOneModelData(416, 2), level=-2)

cv.features_sel_del(model_id, 3, early_stop_cv=lambda x: x>0.53, log_file='workdir/logs/feat19.txt', exclude=['category_2_82'])


Exemplo n.º 4
0
import datetime
import numpy as np

from hyperopt import hp, fmin, tpe
import os
import sys
sys.path.insert(0, os.getcwd())
import workdir.classes.config
from qml.cv import QCV
from qml.models import QXgb, QAvg, QAvgOneModelData
from workdir.classes.models import qm



cv = QCV(qm)

model_id = qm.add_by_params(
    QXgb(
** {"alpha": 1.0, "booster": "gbtree", "colsample_bylevel": 0.7, "colsample_bytree": 0.8, "eta": 0.004, "eval_metric": "logloss",
    "gamma": 0.2, "max_depth": 4, "num_boost_round": 2015, "objective": "binary:logistic", "subsample": 0.8, "tree_method": "hist"}
    ),
    'hyperopt xgb', level=-1
)

model_id =qm.add_by_params(QAvgOneModelData(model_id, 3), level=-2)

cv.features_sel_del(model_id, 66, early_stop_cv=lambda x: x>0.5414, log_file='workdir/logs/feat19.txt', exclude=[])


Exemplo n.º 5
0
cv.features_sel_del(
    model_id, 19,
    early_stop_cv=lambda x: x<0.557, # minmax
    log_file='workdir/logs/data19_sub_cols1.txt',
    exclude=[
    ],
    columns=[
'com_cat_2__last__cat_45',
'itc__last',
'com_cat_2__last__cat_44',
'com_cat_2__last__cat_43',
'com_cat_2__last__cat_42',
'com_cat_2__last__cat_41',
'com_cat_2__last__cat_40',
'com_cat_2__last__cat_39',
'com_cat_2__last__cat_38',
'com_cat_2__last__cat_37',
'com_cat_2__last__cat_36',
'arpu_group__04',
'com_cat_23__last',
'com_cat_21__last',
'com_cat_30__max',
'com_cat_29__median',
'com_cat_2__last__cat_51',
'com_cat_2__last__cat_50',
'com_cat_2__last__cat_49',
'com_cat_2__last__cat_48',
'com_cat_2__last__cat_47',
'com_cat_2__last__cat_46',
'sum_data_min__avg__mon1',
'com_cat_26__sum_zero',
'com_cat_26__2',
'com_cat_26__1',
'com_cat_25__sum_zero',
'com_cat_24__sum_not3112',
'com_cat_2__last__cat_9',
'com_cat_2__last__cat_13',
'com_cat_2__last__cat_12',
'com_cat_2__last__cat_11',
'com_cat_2__last__cat_10',
'com_cat_22__sum',
'com_cat_2__last__cat_7',
'com_cat_2__last__cat_6',
'com_cat_2__last__cat_5',
'com_cat_2__last__cat_4',
'com_cat_2__last__cat_3',
'com_cat_2__last__cat_2',
'com_cat_2__last__cat_1',
'sum_data_min__avg__mon2',
'com_cat_3__10',
'com_cat_27__last',
'com_cat_34__last',
'com_cat_2__last',
'com_cat_3__09',
'arpu_group__02',
'arpu_group__03',
'arpu_group__10',
'sum_minutes__sum__mon3',
'com_cat_33__sum',
'com_cat_2__last__cat_20',
'com_cat_2__last__cat_19',
'com_cat_2__last__cat_18',
'com_cat_2__last__cat_17',
'com_cat_2__last__cat_16',
'com_cat_2__last__cat_15',
'device_type_id__4',
'cell_lac_count',
'com_cat_2__last__cat_21',
'com_cat_7__8',
'com_cat_3__last__cat_7',
'arpu_group__last__cat_4.0',
'rent_channel__median',
'com_cat_31__median',
'com_cat_3__last__cat_15',
'com_cat_28__sum',
'com_cat_1__cat_4',
'com_cat_3__last__cat_9',
    ]
)