Exemplo n.º 1
0
def tiny_robot(x_train, y_train, x_valid, rf_rp, ext_rp, xgb_rp,
               **robot_kwargs):

    robot_cv_stack = robot_kwargs.get('robot_cv_stack', 5)
    robot_nb_auto_max = robot_kwargs.get('robot_nb_auto_max', -1)

    res = []
    nb_auto = 0

    logging.info(" >>> DGH >>> Feature cleaning")
    x_train_num, x_valid_num = numerize(x_train, x_valid, **robot_kwargs)

    stack_res = []
    logging.info(" >>> DGH >>> Cross-val-stacking")
    for train1_idx, stack_idx in tqdm(KFold(len(x_train_num),
                                            n_folds=robot_cv_stack,
                                            shuffle=True),
                                      nested=True,
                                      desc='cv2'):
        y_probas = Cvs.stack_that(x_train_num, y_train, x_valid_num,
                                  train1_idx, stack_idx, rf_rp, ext_rp, xgb_rp)
        stack_res.append(y_probas)
        nb_auto += 1
        if nb_auto == robot_nb_auto_max:
            return res
        res.append(stack_res)
    return res
Exemplo n.º 2
0
def robot(x_train, y_train, x_valid, rf_ip, ext_ip, xgb_ip, **robot_kwargs):

    robot_cv_feat = robot_kwargs.get('robot_cv_feat', 6)
    robot_cv_hopt = robot_kwargs.get('robot_cv_hopt', 6)
    robot_cv_stack = robot_kwargs.get('robot_cv_stack', 5)
    robot_nb_auto_max = robot_kwargs.get('robot_nb_auto_max', -1)
    robot_rand_state = robot_kwargs.get('robot_rand_state', 42)

    res = []
    nb_auto = 0
    nb_samples = len(x_train)

    for train1_idx, feat_idx in KFold(nb_samples,
                                      n_folds=robot_cv_feat,
                                      shuffle=True,
                                      random_state=robot_rand_state):
        x_train1 = x_train.iloc[train1_idx]
        y_train1 = y_train.iloc[train1_idx]
        x_feat = x_train.iloc[feat_idx]
        y_feat = y_train.iloc[feat_idx]

        logging.info(" >>> DGH >>> Chaos feature generation")
        x_train1, x_valid = chaosize(x_feat, x_train1, x_valid, y_feat,
                                     **robot_kwargs)

        logging.info(" >>> DGH >>> Feature cleaning")
        x_train_num, x_valid_num = numerize(x_train1, x_valid, **robot_kwargs)

        for train2_idx, hopt_idx in KFold(len(x_train_num),
                                          n_folds=robot_cv_hopt,
                                          shuffle=True,
                                          random_state=robot_rand_state):
            x_train2 = x_train_num.iloc[train2_idx]
            y_train2 = y_train1.iloc[train2_idx]
            x_hopt = x_train_num.iloc[hopt_idx]
            y_hopt = y_train1.iloc[hopt_idx]

            logging.info(" >>> DGH >>> Looking for hopt parameters")
            rf_rp = Misc.enhance_param(
                Cvs.get_best_sklopt(x_hopt, y_hopt, rf_ip), **robot_kwargs)
            ext_rp = Misc.enhance_param(
                Cvs.get_best_etopt(x_hopt, y_hopt, ext_ip), **robot_kwargs)
            xgb_rp = Misc.enhance_param(
                Cvs.get_best_xgbopt(x_hopt, y_hopt, xgb_ip), **robot_kwargs)

            stack_res = []
            logging.info(" >>> DGH >>> Cross-val-stacking")
            for train3_idx, stack_idx in KFold(len(x_train2),
                                               n_folds=robot_cv_stack,
                                               shuffle=True):
                y_probas = Cvs.stack_that(x_train2, y_train2, x_valid_num,
                                          train3_idx, stack_idx, rf_rp, ext_rp,
                                          xgb_rp)
                stack_res.append(y_probas)
            res.append(stack_res)

            nb_auto += 1
            if nb_auto == robot_nb_auto_max:
                return res
    return res
Exemplo n.º 3
0
def robot(x_train, y_train, x_valid, rf_ip, ext_ip, xgb_ip, **robot_kwargs):

    robot_cv_feat = robot_kwargs.get('robot_cv_feat', 6)
    robot_cv_hopt = robot_kwargs.get('robot_cv_hopt', 6)
    robot_cv_stack = robot_kwargs.get('robot_cv_stack', 5)
    robot_nb_auto_max = robot_kwargs.get('robot_nb_auto_max', -1)
    robot_rand_state = robot_kwargs.get('robot_rand_state', 42)

    res = []
    nb_auto = 0

    nb_samples = len(x_train)

    for train1_idx, feat_idx in KFold(nb_samples, n_folds=robot_cv_feat, shuffle=True,
                                      random_state=robot_rand_state):
        x_train1 = x_train.iloc[train1_idx]
        y_train1 = y_train.iloc[train1_idx]
        x_feat = x_train.iloc[feat_idx]
        y_feat = y_train.iloc[feat_idx]

        logging.info("Chaos feature generation")
        x_train1, x_valid = chaosize(x_feat, x_train1, x_valid, y_feat, **robot_kwargs)

        logging.info("Feature cleaning")
        x_train_num, x_valid_num = numerize(x_train1, x_valid, **robot_kwargs)

        for train2_idx, hopt_idx in KFold(len(x_train_num), n_folds=robot_cv_hopt, shuffle=True,
                                          random_state=robot_rand_state):
            x_train2 = x_train_num.iloc[train2_idx]
            y_train2 = y_train1.iloc[train2_idx]
            x_hopt = x_train_num.iloc[hopt_idx]
            y_hopt = y_train1.iloc[hopt_idx]

            logging.info("Looking for hopt parameters")
            rf_rp = Misc.enhance_param(Cvs.get_best_sklopt(x_hopt, y_hopt, rf_ip), **robot_kwargs)
            ext_rp = Misc.enhance_param(Cvs.get_best_etopt(x_hopt, y_hopt, ext_ip), **robot_kwargs)
            xgb_rp = Misc.enhance_param(Cvs.get_best_xgbopt(x_hopt, y_hopt, xgb_ip), **robot_kwargs)

            stack_res = []
            logging.info("Starting cross-val-stacking")
            for train3_idx, stack_idx in KFold(len(x_train2), n_folds=robot_cv_stack, shuffle=True):
                y_probas = Cvs.stack_that(x_train2, y_train2, x_valid_num, train3_idx, stack_idx,
                                          rf_rp, ext_rp, xgb_rp)
                stack_res.append(y_probas)
            res.append(stack_res)

            nb_auto += 1
            if nb_auto == robot_nb_auto_max:
                return res
    return res
Exemplo n.º 4
0
def small_robot(x_train, y_train, x_valid, rf_rp, ext_rp, xgb_rp,
                **robot_kwargs):

    robot_cv_feat = robot_kwargs.get('robot_cv_feat', 6)
    robot_cv_stack = robot_kwargs.get('robot_cv_stack', 5)
    robot_nb_auto_max = robot_kwargs.get('robot_nb_auto_max', -1)
    robot_rand_state = robot_kwargs.get('robot_rand_state', 42)

    res = []
    nb_auto = 0

    nb_samples = len(x_train)

    for train1_idx, feat_idx in tqdm(KFold(nb_samples,
                                           n_folds=robot_cv_feat,
                                           shuffle=True,
                                           random_state=robot_rand_state),
                                     desc='cv1'):
        x_train1 = x_train.iloc[train1_idx]
        y_train1 = y_train.iloc[train1_idx]
        x_feat = x_train.iloc[feat_idx]
        y_feat = y_train.iloc[feat_idx]

        logging.info(" >>> DGH >>> Chaos feature generation")
        x_train1, x_valid = chaosize(x_feat, x_train1, x_valid, y_feat,
                                     **robot_kwargs)

        logging.info(" >>> DGH >>> Feature cleaning")
        x_train_num, x_valid_num = numerize(x_train1, x_valid, **robot_kwargs)

        stack_res = []
        logging.info(" >>> DGH >>> Cross-val-stacking")
        for train2_idx, stack_idx in tqdm(KFold(len(x_train_num),
                                                n_folds=robot_cv_stack,
                                                shuffle=True),
                                          nested=True,
                                          desc='cv2'):
            y_probas = Cvs.stack_that(x_train_num, y_train1, x_valid_num,
                                      train2_idx, stack_idx, rf_rp, ext_rp,
                                      xgb_rp)
            stack_res.append(y_probas)
        res.append(stack_res)

        nb_auto += 1
        if nb_auto == robot_nb_auto_max:
            return res
    return res
Exemplo n.º 5
0
def small_robot(x_train, y_train, x_valid, rf_rp, ext_rp, xgb_rp, **robot_kwargs):

    robot_cv_feat = robot_kwargs.get('robot_cv_feat', 6)
    robot_cv_stack = robot_kwargs.get('robot_cv_stack', 5)
    robot_nb_auto_max = robot_kwargs.get('robot_nb_auto_max', -1)
    robot_rand_state = robot_kwargs.get('robot_rand_state', 42)

    res = []
    nb_auto = 0

    nb_samples = len(x_train)

    for train1_idx, feat_idx in KFold(nb_samples, n_folds=robot_cv_feat, shuffle=True,
                                      random_state=robot_rand_state):
        x_train1 = x_train.iloc[train1_idx]
        y_train1 = y_train.iloc[train1_idx]
        x_feat = x_train.iloc[feat_idx]
        y_feat = y_train.iloc[feat_idx]

        logging.info("Chaos feature generation")
        x_train1, x_valid = chaosize(x_feat, x_train1, x_valid, y_feat, **robot_kwargs)

        logging.info("Feature cleaning")
        x_train_num, x_valid_num = numerize(x_train1, x_valid, **robot_kwargs)

        stack_res = []
        logging.info("Starting cross-val-stacking")
        for train2_idx, stack_idx in KFold(len(x_train_num), n_folds=robot_cv_stack, shuffle=True):
            y_probas = Cvs.stack_that(x_train_num, y_train1, x_valid_num, train2_idx, stack_idx,
                                      rf_rp, ext_rp, xgb_rp)
            stack_res.append(y_probas)
        res.append(stack_res)

        nb_auto += 1
        if nb_auto == robot_nb_auto_max:
            return res
    return res