def run_base_model_nfm(dfTrain, dfTest, folds, pnn_params): fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols=config.IGNORE_COLS) data_parser = DataParser(feat_dict=fd) # Xi_train :列的序号 # Xv_train :列的对应的值 Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True) Xi_test, Xv_test, ids_test = data_parser.parse(df=dfTest) #print(dfTrain.dtypes) pnn_params['feature_size'] = fd.feat_dim pnn_params['field_size'] = len(Xi_train[0]) _get = lambda x, l: [x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get( Xv_train, train_idx), _get(y_train, train_idx) Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get( Xv_train, valid_idx), _get(y_train, valid_idx) nfm = NFM(**pnn_params) nfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
def run_base_model_nfm(dfTrain,dfTest,folds,pnn_params): fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest, numeric_cols=config.NUMERIC_COLS, ignore_cols = config.IGNORE_COLS) data_parser = DataParser(feat_dict= fd) # Xi_train :列的序号 # Xv_train :列的对应的值 Xi_train,Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True) Xi_test,Xv_test,ids_test = data_parser.parse(df=dfTest) print(dfTrain.dtypes) pnn_params['feature_size'] = fd.feat_dim pnn_params['field_size'] = len(Xi_train[0]) _get = lambda x,l:[x[i] for i in l] for i, (train_idx, valid_idx) in enumerate(folds): Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx) Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx) nfm = NFM(**pnn_params) nfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_)
def plot_nfm(): # 读取数据 data, dense_features, sparse_features = read_criteo_data() dense_features = dense_features[:3] sparse_features = sparse_features[:2] # 将特征分组,分成linear部分和dnn部分(根据实际场景进行选择),并将分组之后的特征做标记(使用DenseFeat, SparseFeat) linear_feature_columns = [ SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4) for i, feat in enumerate(sparse_features) ] + [DenseFeat( feat, 1, ) for feat in dense_features] dnn_feature_columns = [ SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4) for i, feat in enumerate(sparse_features) ] + [DenseFeat( feat, 1, ) for feat in dense_features] # 构建NFM模型 history = NFM(linear_feature_columns, dnn_feature_columns) keras.utils.plot_model(history, to_file="./imgs/NFM.png", show_shapes=True)
""" if args.model_type == 'bprmf': model = BPRMF(data_config=config, pretrain_data=pretrain_data, args=args) elif args.model_type == 'cke': model = CKE(data_config=config, pretrain_data=pretrain_data, args=args) elif args.model_type in ['cfkg']: model = CFKG(data_config=config, pretrain_data=pretrain_data, args=args) elif args.model_type in ['nfm', 'fm']: model = NFM(data_config=config, pretrain_data=pretrain_data, args=args) elif args.model_type in ['kgat']: model = KGAT(data_config=config, pretrain_data=pretrain_data, args=args) saver = tf.train.Saver() """ ********************************************************* Save the model parameters. """ if args.save_flag == 1: if args.model_type in ['bprmf', 'cke', 'fm', 'cfkg']: weights_save_path = '%sweights/%s/%s/l%s_r%s' % ( args.weights_path, args.dataset, model.model_type, str(
# =============== 参数设置 =============== sample_num = 200000 # 取部分数据进行测试 test_size = 0.2 k = 8 dropout = 0.5 reg = 1e-4 # =============== 准备数据 =============== dense_feature = ['I' + str(i) for i in range(1, 14)] sparse_feature = ['C' + str(i) for i in range(1, 27)] embed_dict, train_df, test_df = preprocess(args.file_path, sample_num, test_size) embed_num = list(embed_dict.values()) dense_dim = len(dense_feature) hidden_units = [dense_dim + k, 256, 128, 64] train_dataset = NFMDataset(train_df, dense_feature, sparse_feature) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) # =============== 创建模型 =============== NFM_model = NFM(embed_num, k, dense_dim, hidden_units, dropout) loss_func = nn.BCELoss() optimizer = optim.Adam(NFM_model.parameters(), lr=args.learning_rate, weight_decay=reg) # =============== 模型训练与测试 =============== train(NFM_model, args.epochs, train_loader, loss_func, optimizer) test(NFM_model, test_df, dense_feature, sparse_feature)
nrows = None if len(sys.argv) > 1: nrows = sys.argv[1] nrows = int(nrows) if __name__ == '__main__': path = '../data/data.csv' feature_size, data = data_loader.data_load('../data/data.csv', nrows=nrows) features = ['userId', 'movieId', 'tag'] num = data.shape[0] * 4 // 5 model = NFM(features, feature_size, embedding_size=8, layers=[200, 200, 200], verbose=False) X = data[features].values y = data.label.values.reshape(-1, 1) ''' model.fit( X[:num],y[:num], epoch=20, X_valid=X[num:],y_valid=y[num:], early_stopping=True, refit=True ) ''' import time start = time.time()
def __init__(self, dataset_name, model_name, model_dir, arg_file, **argc): self.dataset_name = dataset_name self.model_name = model_name self.model_dir = model_dir self.arg_file = arg_file if 'model' in argc: self.model = argc['model'] self.sess = argc['sess'] else: if model_name == 'kgat': sys.path.append(KGAT_PATH) from KGAT import KGAT from utility.loader_kgat import KGAT_loader config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.args = self.build_args() data = KGAT_loader(args=self.args, path='data/{}'.format(dataset_name)) config = self.build_config(data) self.model = KGAT(data_config=config, pretrain_data=None, args=self.args) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.dirname(model_dir / 'checkpoint')) if ckpt and ckpt.model_checkpoint_path: self.sess.run(tf.global_variables_initializer()) saver.restore(self.sess, ckpt.model_checkpoint_path) self.model.update_attentive_A(self.sess) elif model_name == 'cke': sys.path.append(KGAT_PATH) from CKE import CKE from utility.loader_cke import CKE_loader config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.args = self.build_args() data = CKE_loader(args=self.args, path='data/{}'.format(dataset_name)) config = self.build_config(data) self.model = CKE(data_config=config, pretrain_data=None, args=self.args) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.dirname(model_dir / 'checkpoint')) if ckpt and ckpt.model_checkpoint_path: self.sess.run(tf.global_variables_initializer()) saver.restore(self.sess, ckpt.model_checkpoint_path) elif model_name == 'ripple': sys.path.append(RIPPLE_PATH) from ripple_model import RippleNet from ripple_data_loader import load_data as ld config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.args = self.build_args() self.args.dataset = dataset_name self.loader = ld(self.args) data = argc['data'] self.model = RippleNet(self.args, data.n_entities, data.n_relations) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.dirname(model_dir / 'checkpoint')) if ckpt and ckpt.model_checkpoint_path: self.sess.run(tf.global_variables_initializer()) saver.restore(self.sess, ckpt.model_checkpoint_path) elif model_name == 'cfkg': sys.path.append(KGAT_PATH) from CFKG import CFKG from utility.loader_cfkg import CFKG_loader config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.args = self.build_args() data = CFKG_loader(args=self.args, path='data/{}'.format(dataset_name)) config = self.build_config(data) self.model = CFKG(data_config=config, pretrain_data=None, args=self.args) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.dirname(model_dir / 'checkpoint')) if ckpt and ckpt.model_checkpoint_path: self.sess.run(tf.global_variables_initializer()) saver.restore(self.sess, ckpt.model_checkpoint_path) elif model_name == 'nfm': sys.path.append(KGAT_PATH) from NFM import NFM from utility.loader_nfm import NFM_loader config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.args = self.build_args() data = NFM_loader(args=self.args, path='data/{}'.format(dataset_name)) self.loader = data config = self.build_config(data) self.model = NFM(data_config=config, pretrain_data=None, args=self.args) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.dirname(model_dir / 'checkpoint')) if ckpt and ckpt.model_checkpoint_path: self.sess.run(tf.global_variables_initializer()) saver.restore(self.sess, ckpt.model_checkpoint_path) elif model_name == 'EKGCN_torch': from model import Model self.device = torch.device('cuda:{}'.format(argc['gpu_id'])) self.model = Model.load_checkpoint(self.model_dir / 'model.pt', self.device).to(self.device) self.user_score = {} #cached elif model_name in ['EKGCN_s', 'EKGCN_g', 'EKGCN_n', 'EKGCN']: from EKGCN import EKGCN config = tf.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.Session(config=config) self.args = self.build_args() data = argc['data'] self.model = EKGCN(self.args, data, sess=self.sess) data.get_full_kg() saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state( os.path.dirname(model_dir / 'checkpoint')) if ckpt and ckpt.model_checkpoint_path: self.sess.run(tf.global_variables_initializer()) print('>>> restore from {}'.format( ckpt.model_checkpoint_path)) saver.restore(self.sess, ckpt.model_checkpoint_path) self.model.update_A(self.sess)
help='decay rate', type=float, default=0.99) args = parser.parse_args(args=[]) # load data set X_train_cate, X_train_cont, y_train, X_test_cate, X_test_cont, y_test, cate_list = load_dataset( args.input_dir) cate_num = X_train_cate.shape[1] cont_num = X_train_cont.shape[1] tf.reset_default_graph() with tf.Session() as sess: # define model model = NFM.NFM(args, cate_num, cont_num, cate_list) model.build() ckpt = tf.train.get_checkpoint_state( os.path.join(args.input_dir, args.model_name)) if ckpt: print('Loading model parameters from %s' % ckpt.model_checkpoint_path) model.saver.restore(sess, ckpt.model_checkpoint_path) else: print('Creating model with inital parameters') sess.run(tf.global_variables_initializer()) step = 0 for epoch in range(args.epoch): start_time = time.time()