def build_model( self, embedding_dim=4, task='binary', optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'], device='cpu', ): fixlen_feature_columns = [ SparseFeat( feat, vocabulary_size=self.vocabulary_size_dict[feat], embedding_dim=embedding_dim, ) for feat in self.sparse_features ] if self.variable_length_features: varlen_feature_columns = [ VarLenSparseFeat( SparseFeat( feat, vocabulary_size=self.vocabulary_size_dict[feat], embedding_dim=embedding_dim, ), maxlen=self.variable_length_features_max_len[feat], combiner='mean', ) for feat in self.variable_length_features ] else: varlen_feature_columns = [] linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns model = DeepFM(linear_feature_columns, dnn_feature_columns, task=task, device=device) model.compile(optimizer, loss, metrics) return model
def test_DeepFM(use_fm, hidden_size, sparse_feature_num): model_name = "DeepFM" sample_size = SAMPLE_SIZE x, y, feature_columns = get_test_data(sample_size, sparse_feature_num, sparse_feature_num) model = DeepFM(feature_columns, feature_columns, use_fm=use_fm, dnn_hidden_units=hidden_size, dnn_dropout=0.5) check_model(model, model_name, x, y)
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(glowpick, test_size=0.2) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} device = 'cpu' use_cuda = True if use_cuda and torch.cuda.is_available(): print('cuda ready...') device = 'cuda:0' model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device) model.load_state_dict(torch.load(MODEL_PATH)) model.eval() # epoch 6 from math import sqrt pred_ans = model.predict(test_model_input, batch_size=256) print("test MSE", round(mean_squared_error(test[target].values, pred_ans), 4)) print("\ntest RMSE", round(sqrt(mean_squared_error(test[target].values, pred_ans)), 4)) use_col = [ 'created_at', 'rating', 'origin_user_id', 'origin_product_id', 'origin_age', 'origin_gender', 'price', 'brandName',
dnn_feature_columns) # 3.generate input data for model train, test = train_test_split(data, test_size=0.2) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} # 4.Define Model,train,predict and evaluate device = 'cpu' use_cuda = True if use_cuda and torch.cuda.is_available(): print('cuda ready...') device = 'cuda:0' model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression', device=device) model.compile( "adam", "mse", metrics=['mse'], ) history = model.fit( train_model_input, train[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) train, test = train_test_split(data, test_size=0.2) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} device = 'cpu' use_cuda = True if use_cuda and torch.cuda.is_available(): print('cuda ready...') device = 'cuda:0' model = DeepFM(linear_feature_columns=linear_feature_columns, dnn_feature_columns=dnn_feature_columns, task='binary', l2_reg_embedding=1e-5, device=device) model.compile( "adagrad", "binary_crossentropy", metrics=["binary_crossentropy", "auc"], ) model.fit(train_model_input, train[target].values, batch_size=32, epochs=10, validation_split=0.0, verbose=2)
] varlen_feature_columns = [ VarLenSparseFeat('genres', len(key2index) + 1, max_len, 'mean') ] # Notice : value 0 is for padding for sequence input feature linear_feature_columns = fixlen_feature_columns + varlen_feature_columns dnn_feature_columns = fixlen_feature_columns + varlen_feature_columns feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) # 3.generate input data for model model_input = {name: data[name] for name in feature_names} model_input['genres'] = genres_list # 4.Define Model,compile and train model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression') model.compile( "adam", "mse", metrics=['mse'], ) history = model.fit( model_input, data[target].values, batch_size=256, epochs=10, verbose=2, validation_split=0.2, )
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns) train, test = train_test_split(data, test_size=0.2, random_state=666) train_model_input = {name: train[name] for name in feature_names} test_model_input = {name: test[name] for name in feature_names} device = 'cpu' use_cuda = True if use_cuda and torch.cuda.is_available(): print('cuda ready...') device = 'cuda:0' model = DeepFM(linear_feature_columns, dnn_feature_columns, task='binary', device=device) model.compile( Adam(model.parameters(), lr), "binary_crossentropy", metrics=['binary_crossentropy', 'auc'], ) history = model.fit(train_model_input, train[target].values, batch_size=64, epochs=epoch, verbose=2, validation_split=0.2) pred_ans = model.predict(test_model_input, batch_size=64) print("test LogLoss", round(log_loss(test[target].values, pred_ans), 4))