Exemplo n.º 1
0
def run_base_model_nfm(dfTrain, dfTest, folds, pnn_params):
    fd = FeatureDictionary(dfTrain=dfTrain,
                           dfTest=dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols=config.IGNORE_COLS)
    data_parser = DataParser(feat_dict=fd)
    # Xi_train :列的序号
    # Xv_train :列的对应的值
    Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True)
    Xi_test, Xv_test, ids_test = data_parser.parse(df=dfTest)

    #print(dfTrain.dtypes)
    pnn_params['feature_size'] = fd.feat_dim
    pnn_params['field_size'] = len(Xi_train[0])

    _get = lambda x, l: [x[i] for i in l]

    for i, (train_idx, valid_idx) in enumerate(folds):
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(
            Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(
            Xv_train, valid_idx), _get(y_train, valid_idx)

        nfm = NFM(**pnn_params)
        nfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
Exemplo n.º 2
0
def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params):
    fd = FeatureDictionary(dfTrain=dfTrain,
                           dfTest=dfTest,
                           numeric_cols=config.NUMERIC_COLS,
                           ignore_cols = config.IGNORE_COLS)
    data_parser = DataParser(feat_dict= fd)
    # Xi_train :列的序号
    # Xv_train :列的对应的值
    Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)
    Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest)

    print(dfTrain.dtypes)

    pnn_params['feature_size'] = fd.feat_dim
    pnn_params['field_size'] = len(Xi_train[0])


    _get = lambda x,l:[x[i] for i in l]



    for i, (train_idx, valid_idx) in enumerate(folds):
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)

        nfm = NFM(**pnn_params)
        nfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
Exemplo n.º 3
0
def plot_nfm():
    # 读取数据
    data, dense_features, sparse_features = read_criteo_data()
    dense_features = dense_features[:3]
    sparse_features = sparse_features[:2]

    # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat)
    linear_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    dnn_feature_columns = [
        SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
        for i, feat in enumerate(sparse_features)
    ] + [DenseFeat(
        feat,
        1,
    ) for feat in dense_features]

    # 构建NFM模型
    history = NFM(linear_feature_columns, dnn_feature_columns)
    keras.utils.plot_model(history, to_file="./imgs/NFM.png", show_shapes=True)
Exemplo n.º 4
0
    """
    if args.model_type == 'bprmf':
        model = BPRMF(data_config=config,
                      pretrain_data=pretrain_data,
                      args=args)

    elif args.model_type == 'cke':
        model = CKE(data_config=config, pretrain_data=pretrain_data, args=args)

    elif args.model_type in ['cfkg']:
        model = CFKG(data_config=config,
                     pretrain_data=pretrain_data,
                     args=args)

    elif args.model_type in ['nfm', 'fm']:
        model = NFM(data_config=config, pretrain_data=pretrain_data, args=args)

    elif args.model_type in ['kgat']:
        model = KGAT(data_config=config,
                     pretrain_data=pretrain_data,
                     args=args)

    saver = tf.train.Saver()
    """
    *********************************************************
    Save the model parameters.
    """
    if args.save_flag == 1:
        if args.model_type in ['bprmf', 'cke', 'fm', 'cfkg']:
            weights_save_path = '%sweights/%s/%s/l%s_r%s' % (
                args.weights_path, args.dataset, model.model_type, str(
Exemplo n.º 5
0
    # =============== 参数设置 ===============
    sample_num = 200000  # 取部分数据进行测试
    test_size = 0.2
    k = 8
    dropout = 0.5
    reg = 1e-4

    # =============== 准备数据 ===============
    dense_feature = ['I' + str(i) for i in range(1, 14)]
    sparse_feature = ['C' + str(i) for i in range(1, 27)]
    embed_dict, train_df, test_df = preprocess(args.file_path, sample_num,
                                               test_size)
    embed_num = list(embed_dict.values())
    dense_dim = len(dense_feature)
    hidden_units = [dense_dim + k, 256, 128, 64]
    train_dataset = NFMDataset(train_df, dense_feature, sparse_feature)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)

    # =============== 创建模型 ===============
    NFM_model = NFM(embed_num, k, dense_dim, hidden_units, dropout)
    loss_func = nn.BCELoss()
    optimizer = optim.Adam(NFM_model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=reg)

    # =============== 模型训练与测试 ===============
    train(NFM_model, args.epochs, train_loader, loss_func, optimizer)
    test(NFM_model, test_df, dense_feature, sparse_feature)
Exemplo n.º 6
0
nrows = None
if len(sys.argv) > 1:
    nrows = sys.argv[1]
    nrows = int(nrows)

if __name__ == '__main__':
    path = '../data/data.csv'

    feature_size, data = data_loader.data_load('../data/data.csv', nrows=nrows)
    features = ['userId', 'movieId', 'tag']

    num = data.shape[0] * 4 // 5

    model = NFM(features,
                feature_size,
                embedding_size=8,
                layers=[200, 200, 200],
                verbose=False)

    X = data[features].values
    y = data.label.values.reshape(-1, 1)
    '''
    model.fit(
        X[:num],y[:num], epoch=20,
        X_valid=X[num:],y_valid=y[num:],
        early_stopping=True, refit=True
    )
    '''
    import time

    start = time.time()
Exemplo n.º 7
0
    def __init__(self, dataset_name, model_name, model_dir, arg_file, **argc):
        self.dataset_name = dataset_name
        self.model_name = model_name
        self.model_dir = model_dir
        self.arg_file = arg_file
        if 'model' in argc:
            self.model = argc['model']
            self.sess = argc['sess']
        else:
            if model_name == 'kgat':
                sys.path.append(KGAT_PATH)
                from KGAT import KGAT
                from utility.loader_kgat import KGAT_loader

                config = tf.ConfigProto()
                config.gpu_options.allow_growth = True
                self.sess = tf.Session(config=config)

                self.args = self.build_args()
                data = KGAT_loader(args=self.args,
                                   path='data/{}'.format(dataset_name))
                config = self.build_config(data)

                self.model = KGAT(data_config=config,
                                  pretrain_data=None,
                                  args=self.args)
                saver = tf.train.Saver()
                ckpt = tf.train.get_checkpoint_state(
                    os.path.dirname(model_dir / 'checkpoint'))
                if ckpt and ckpt.model_checkpoint_path:
                    self.sess.run(tf.global_variables_initializer())
                    saver.restore(self.sess, ckpt.model_checkpoint_path)
                self.model.update_attentive_A(self.sess)
            elif model_name == 'cke':
                sys.path.append(KGAT_PATH)
                from CKE import CKE
                from utility.loader_cke import CKE_loader

                config = tf.ConfigProto()
                config.gpu_options.allow_growth = True
                self.sess = tf.Session(config=config)

                self.args = self.build_args()
                data = CKE_loader(args=self.args,
                                  path='data/{}'.format(dataset_name))
                config = self.build_config(data)

                self.model = CKE(data_config=config,
                                 pretrain_data=None,
                                 args=self.args)
                saver = tf.train.Saver()
                ckpt = tf.train.get_checkpoint_state(
                    os.path.dirname(model_dir / 'checkpoint'))
                if ckpt and ckpt.model_checkpoint_path:
                    self.sess.run(tf.global_variables_initializer())
                    saver.restore(self.sess, ckpt.model_checkpoint_path)
            elif model_name == 'ripple':
                sys.path.append(RIPPLE_PATH)
                from ripple_model import RippleNet
                from ripple_data_loader import load_data as ld

                config = tf.ConfigProto()
                config.gpu_options.allow_growth = True
                self.sess = tf.Session(config=config)

                self.args = self.build_args()
                self.args.dataset = dataset_name
                self.loader = ld(self.args)

                data = argc['data']
                self.model = RippleNet(self.args, data.n_entities,
                                       data.n_relations)
                saver = tf.train.Saver()
                ckpt = tf.train.get_checkpoint_state(
                    os.path.dirname(model_dir / 'checkpoint'))
                if ckpt and ckpt.model_checkpoint_path:
                    self.sess.run(tf.global_variables_initializer())
                    saver.restore(self.sess, ckpt.model_checkpoint_path)

            elif model_name == 'cfkg':
                sys.path.append(KGAT_PATH)
                from CFKG import CFKG
                from utility.loader_cfkg import CFKG_loader

                config = tf.ConfigProto()
                config.gpu_options.allow_growth = True
                self.sess = tf.Session(config=config)

                self.args = self.build_args()
                data = CFKG_loader(args=self.args,
                                   path='data/{}'.format(dataset_name))
                config = self.build_config(data)

                self.model = CFKG(data_config=config,
                                  pretrain_data=None,
                                  args=self.args)
                saver = tf.train.Saver()
                ckpt = tf.train.get_checkpoint_state(
                    os.path.dirname(model_dir / 'checkpoint'))
                if ckpt and ckpt.model_checkpoint_path:
                    self.sess.run(tf.global_variables_initializer())
                    saver.restore(self.sess, ckpt.model_checkpoint_path)
            elif model_name == 'nfm':
                sys.path.append(KGAT_PATH)
                from NFM import NFM
                from utility.loader_nfm import NFM_loader

                config = tf.ConfigProto()
                config.gpu_options.allow_growth = True
                self.sess = tf.Session(config=config)

                self.args = self.build_args()
                data = NFM_loader(args=self.args,
                                  path='data/{}'.format(dataset_name))
                self.loader = data
                config = self.build_config(data)

                self.model = NFM(data_config=config,
                                 pretrain_data=None,
                                 args=self.args)
                saver = tf.train.Saver()
                ckpt = tf.train.get_checkpoint_state(
                    os.path.dirname(model_dir / 'checkpoint'))
                if ckpt and ckpt.model_checkpoint_path:
                    self.sess.run(tf.global_variables_initializer())
                    saver.restore(self.sess, ckpt.model_checkpoint_path)
            elif model_name == 'EKGCN_torch':
                from model import Model
                self.device = torch.device('cuda:{}'.format(argc['gpu_id']))
                self.model = Model.load_checkpoint(self.model_dir / 'model.pt',
                                                   self.device).to(self.device)
                self.user_score = {}  #cached
            elif model_name in ['EKGCN_s', 'EKGCN_g', 'EKGCN_n', 'EKGCN']:
                from EKGCN import EKGCN
                config = tf.ConfigProto()
                config.gpu_options.allow_growth = True
                self.sess = tf.Session(config=config)
                self.args = self.build_args()
                data = argc['data']
                self.model = EKGCN(self.args, data, sess=self.sess)
                data.get_full_kg()
                saver = tf.train.Saver()
                ckpt = tf.train.get_checkpoint_state(
                    os.path.dirname(model_dir / 'checkpoint'))
                if ckpt and ckpt.model_checkpoint_path:
                    self.sess.run(tf.global_variables_initializer())
                    print('>>> restore from {}'.format(
                        ckpt.model_checkpoint_path))
                    saver.restore(self.sess, ckpt.model_checkpoint_path)
                self.model.update_A(self.sess)
Exemplo n.º 8
0
                        help='decay rate',
                        type=float,
                        default=0.99)
    args = parser.parse_args(args=[])

    # load data set
    X_train_cate, X_train_cont, y_train, X_test_cate, X_test_cont, y_test, cate_list = load_dataset(
        args.input_dir)

    cate_num = X_train_cate.shape[1]
    cont_num = X_train_cont.shape[1]

    tf.reset_default_graph()
    with tf.Session() as sess:
        # define model
        model = NFM.NFM(args, cate_num, cont_num, cate_list)
        model.build()

        ckpt = tf.train.get_checkpoint_state(
            os.path.join(args.input_dir, args.model_name))
        if ckpt:
            print('Loading model parameters from %s' %
                  ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print('Creating model with inital parameters')
            sess.run(tf.global_variables_initializer())

        step = 0
        for epoch in range(args.epoch):
            start_time = time.time()