示例#1
0
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params):

    fd = FeatureDictionary(dfTrain,dfTest,numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS,
                           cate_cols = config.CATEGORICAL_COLS)

    print(fd.feat_dim)
    print(fd.feat_dict)

    data_parser = DataParser(feat_dict=fd)
    cate_Xi_train, cate_Xv_train, numeric_Xv_train,y_train = data_parser.parse(df=dfTrain, has_label=True)
    cate_Xi_test, cate_Xv_test, numeric_Xv_test,ids_test = data_parser.parse(df=dfTest)

    dcn_params["cate_feature_size"] = fd.feat_dim
    dcn_params["field_size"] = len(cate_Xi_train[0])
    dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS)

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        print("i",i)
        cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_ = _get(cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx),_get(numeric_Xv_train, train_idx), _get(y_train, train_idx)
        cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_ = _get(cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx),_get(numeric_Xv_train, valid_idx), _get(y_train, valid_idx)

        dcn = DCN(**dcn_params)

        s=dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_,i)
        dcn.saver.save(s, 'D:/code/tensorflow_practice/recommendation/Basic-DCN-Demo/model/model', global_step=i + 1)
示例#2
0
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params):

    fd = FeatureDictionary(dfTrain,
                           dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS,
                           cate_cols=config.CATEGORICAL_COLS)

    print(fd.feat_dim)
    print(fd.feat_dict)

    data_parser = DataParser(feat_dict=fd)
    cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse(
        df=dfTrain, has_label=True)
    cate_Xi_test, cate_Xv_test, numeric_Xv_test, ids_test = data_parser.parse(
        df=dfTest)

    dcn_params["cate_feature_size"] = fd.feat_dim
    dcn_params["field_size"] = len(cate_Xi_train[0])
    dcn_params['numeric_feature_size'] = len(config.NUMERIC_COLS)

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_ = _get(
            cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx), _get(
                numeric_Xv_train, train_idx), _get(y_train, train_idx)
        cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_ = _get(
            cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx), _get(
                numeric_Xv_train, valid_idx), _get(y_train, valid_idx)

        dcn = DCN(**dcn_params)

        dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_,
                cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_)
示例#3
0
def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params):
    fd = FeatureDictionary(dfTrain=dfTrain,
                           dfTest=dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols = config.IGNORE_COLS)
    data_parser = DataParser(feat_dict= fd)
    # Xi_train :列的序号
    # Xv_train :列的对应的值
    Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)
    Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest)

    print(dfTrain.dtypes)

    pnn_params['feature_size'] = fd.feat_dim
    pnn_params['field_size'] = len(Xi_train[0])


    _get = lambda x,l:[x[i] for i in l]



    for i, (train_idx, valid_idx) in enumerate(folds):
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)

        afm = my_AFM(**pnn_params)
        afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
示例#4
0
def run_base_model_dcn(dfTrain, dfTest, folds, dcn_params):
    # 类别型特征与索引的映射
    fd = FeatureDictionary(dfTrain,
                           dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS,
                           cate_cols=config.CATEGORICAL_COLS)

    print(fd.feat_dim)
    print(fd.feat_dict)

    # 返回类别型特征索引,类别型特征值,数值型特征,标签值
    data_parser = DataParser(feat_dict=fd)
    cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse(
        df=dfTrain, has_label=True)
    cate_Xi_test, cate_Xv_test, numeric_Xv_test, _ = data_parser.parse(
        df=dfTest)

    # 离散型特征onthot后类别型特征个数
    dcn_params["n_cate_feature"] = fd.feat_dim
    # 离散型特征个数
    dcn_params["n_field"] = len(cate_Xi_train[0])
    print('values', str(fd.feat_dim), 'values', str(len(cate_Xi_train[0])))

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        # 训练集
        cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_ = _get(
            cate_Xi_train, train_idx), _get(cate_Xv_train, train_idx), _get(
                numeric_Xv_train, train_idx), _get(y_train, train_idx)
        # 验证集
        cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_ = _get(
            cate_Xi_train, valid_idx), _get(cate_Xv_train, valid_idx), _get(
                numeric_Xv_train, valid_idx), _get(y_train, valid_idx)

        dcn = DCN(**dcn_params)

        dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_,
                cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_)
示例#5
0
    cols = [c for c in cols if (not c in config.IGNORE_COLS)]

    X_train = dfTrain[cols].values
    y_train = dfTrain["target"].values
    X_test = dfTest[cols].values
    ids_test = dfTest["id"].values

    return dfTrain, dfTest, X_train, y_train, X_test, ids_test,


dfTrain, dfTest, X_train, y_train, X_test, ids_test = load_data()
print('load_data_over')

fd = FeatureDictionary(dfTrain,
                       dfTest,
                       numeric_cols=config.NUMERIC_COLS,
                       ignore_cols=config.IGNORE_COLS,
                       cate_cols=config.CATEGORICAL_COLS)

print(fd.feat_dim)
print(fd.feat_dict)

data_parser = DataParser(feat_dict=fd)
cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse(
    df=dfTrain, has_label=True)
# cate_Xi_test, cate_Xv_test, numeric_Xv_test, y_test, ids_test = data_parser.parse(df=dfTest)


def process(cate_ids, cate_vals, y_label):
    # feat_ins
    # print('----',len(cate_ids))
示例#6
0


    for i, (train_idx, valid_idx) in enumerate(folds):
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)

        afm = my_AFM(**pnn_params)
        afm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)


# load data
dfTrain, dfTest, X_train, y_train, X_test, ids_test, cat_features_indices = load_data()

fd = FeatureDictionary(dfTrain=dfTrain,
                           dfTest=dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols = config.IGNORE_COLS)

data_parser = DataParser(feat_dict= fd)
# Xi_train :列的序号
# Xv_train :列的对应的值
Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)
Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest)

print(dfTrain.dtypes)


_get = lambda x,l:[x[i] for i in l]

# ############随机打乱划分训练集和验证集
np.random.seed(2018)