def __init__(self, list_file, category_emb, field_size): result_dict = data_preprocess.read_criteo_data(list_file, category_emb) self.feature_size = result_dict['feature_sizes'] Xi = result_dict['index'] Xv = result_dict['value'] y = result_dict['label'] self.length = len(y) Xi = np.array(Xi).reshape((-1, field_size, 1)) Xv = np.array(Xv) y = np.array(y) self.Xi = torch.LongTensor(Xi) self.Xv = torch.FloatTensor(Xv) self.Y = torch.FloatTensor(y) print('dataset size: ', self.Xi.shape, self.Xv.shape, self.Y.shape)
# -*- coding:utf-8 -*- from utils import data_preprocess from model import DeepFM import torch result_dict = data_preprocess.read_criteo_data('./data/tiny_train_input.csv', './data/category_emb.csv') test_dict = data_preprocess.read_criteo_data('./data/tiny_test_input.csv', './data/category_emb.csv') with torch.cuda.device(2): deepfm = DeepFM.DeepFM(39,result_dict['feature_sizes'],verbose=True,use_cuda=True, weight_decay=0.0001,use_fm=True,use_ffm=False,use_deep=True).cuda() deepfm.fit(result_dict['index'], result_dict['value'], result_dict['label'], test_dict['index'], test_dict['value'], test_dict['label'],ealry_stopping=True,refit=True)
from models.models_online_deep.deepfm_adam import DeepFMAdam from models.models_online_deep.deepfm_onn import DeepFMOnn from models.models_online_deep.nfm_adam import NFMAdam from models.models_online_deep.nfm_onn import NFMOnn from models.models_online_deep.fm_adam import FMAdam ######################################################################################################################## # save path ######################################################################################################################## save_log = os.getcwd() + '/performance/save_log/' save_model = os.getcwd() + '/performance/save_model/' ######################################################################################################################## # dataset setup ######################################################################################################################## train_dict = data_preprocess.read_criteo_data( 'dataset/criteo/tiny_train_input.csv', 'dataset/criteo/category_emb.csv') train_dict_size = train_dict['size'] num_batchdata = 2500 num_batch = 10 data_config = "Iteration" # data_config = 3 if data_config == "Iteration": batch_train_Xi_list, batch_train_Xv_list, batch_train_Y_list, ratio_list \ = data_preprocess.create_ten_iter('dataset/criteo/tiny_train_input.csv', 'dataset/criteo/category_emb.csv', num_batch, num_batchdata) elif isinstance(data_config, int): batch_train_Xi_list, batch_train_Xv_list, batch_train_Y_list, ratio_list \ = data_preprocess.create_dataset('dataset/criteo/tiny_train_input.csv', 'dataset/criteo/category_emb.csv', int(num_batch / data_config), num_batch, num_batchdata)
:param y: tensor of labels :return: metric of the evaluation """ y_pred = self.inner_predict_proba(Xi, Xv) return self.eval_metric(y.cpu().data.numpy(), y_pred) """ test part """ if __name__ == '__main__': import sys sys.path.append('../') from utils import data_preprocess result_dict = data_preprocess.read_criteo_data('../data/train.csv', '../data/category_emb.csv') test_dict = data_preprocess.read_criteo_data('../data/test.csv', '../data/category_emb.csv') with torch.cuda.device(0): dcn = DCN(39, result_dict['feature_sizes'], batch_size=128 * 32, verbose=True, use_cuda=True, weight_decay=0.00002, use_inner_product=True).cuda() dcn.fit(result_dict['index'], result_dict['value'], result_dict['label'], test_dict['index'], test_dict['value'],
""" :param Xi: tensor of feature index :param Xv: tensor of feature value :param y: tensor of labels :return: metric of the evaluation """ y_pred = self.inner_predict_proba(Xi, Xv) return self.eval_metric(y.cpu().data.numpy(), y_pred) """ test part """ import sys sys.path.append('../') from utils import data_preprocess result_dict = data_preprocess.read_criteo_data('../data/train.csv', '../data/category_emb.csv') test_dict = data_preprocess.read_criteo_data('../data/test.csv', '../data/category_emb.csv') with torch.cuda.device(0): din = DIN(39, result_dict['feature_sizes'], batch_size=128 * 64, is_shallow_dropout=False, verbose=True, use_cuda=True, weight_decay=0.0000002, use_fm=True, use_ffm=False, use_high_interaction=True,interation_type=False).cuda() # din.fit(result_dict['index'], result_dict['value'], result_dict['label'], # test_dict['index'], test_dict['value'], test_dict['label'], ealry_stopping=True, pre_train=True, # n_epochs=32,refit=False, # save_path='../data/model/din.pkl') din.load_state_dict(torch.load('../data/model/din.pkl')) din.fit(result_dict['index'], result_dict['value'], result_dict['label'], test_dict['index'], test_dict['value'], test_dict['label'], ealry_stopping=True, pre_train=False, n_epochs=64, refit=False, save_path='../data/model/din.pkl')
import os import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import roc_auc_score from time import time import torch import torch.autograd as autograd import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.autograd import Variable from utils import data_preprocess rootDir = 'E:/conf_test/dnn_ctr/data' result_dict = data_preprocess.read_criteo_data( rootDir + '/tiny_train_input.csv', rootDir + '/category_emb.csv') #test_dict = data_preprocess.read_criteo_data(rootDir + '/tiny_test_input.csv', rootDir + '/category_emb.csv') # print(result_dict['index']) # print('**********************') print(result_dict['value'][0]) index_list = result_dict['index'] print(len(index_list[0]))
# -*- coding:utf-8 -*- from utils import data_preprocess from model import DeepFM import torch result_dict = data_preprocess.read_criteo_data('data/tiny_train_input.csv', 'data/category_emb.csv') test_dict = data_preprocess.read_criteo_data('data/tiny_test_input.csv', 'data/category_emb.csv') with torch.cuda.device(0): deepfm = DeepFM.DeepFM(39, result_dict['feature_sizes'], verbose=True, use_cuda=True, weight_decay=0.0001, use_fm=True, use_ffm=False, use_deep=True).cuda() deepfm.fit(result_dict['index'], result_dict['value'], result_dict['label'], test_dict['index'], test_dict['value'], test_dict['label'], ealry_stopping=True, refit=True) # no gpu # deepfm = DeepFM.DeepFM(