示例#1
0
def main():
    num_train = 900
    train_data = CriteoDataset('./data', train=True)
    loader_train = DataLoader(train_data,
                              batch_size=100,
                              sampler=sampler.SubsetRandomSampler(
                                  range(num_train)))

    val_data = CriteoDataset('./data', train=True)
    loader_val = DataLoader(val_data,
                            batch_size=100,
                            sampler=sampler.SubsetRandomSampler(
                                range(num_train, 10000)))

    feature_sizes = np.loadtxt('./data/feature_sizes.txt', delimiter=',')
    feature_sizes = [int(x) for x in feature_sizes]
    print(feature_sizes)

    model = DeepFM(feature_sizes)
    optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.0)
    model.train_model(loader_train,
                      loader_val,
                      optimizer,
                      epochs=2,
                      verbose=True)
示例#2
0
文件: main.py 项目: 900groove/deepfm
import torch.optim as optim
from torch.utils.data import DataLoader
from utils import CrickDataset
from model import DeepFM

train = pd.read_csv('./data/processed/train.csv', nrows=10000)
target = train.iloc[:, 0]
train = train.iloc[:, 1:]
f = open('./data/processed/feature_size.pkl', 'rb')
feature_size = pickle.load(f)

dataset = CrickDataset(train.values, target.values)
dataloader = DataLoader(dataset, batch_size=2048, shuffle=True)

model = DeepFM(feature_size).to('cuda')
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.001)
criterion = nn.BCEWithLogitsLoss()

loss_score = []
for _ in range(1):
    for t, (xi, xv, y) in enumerate(dataloader):
        xi = xi.to(device='cuda', dtype=torch.long)
        xv = xv.to(device='cuda', dtype=torch.float)
        y = y.to(device='cuda', dtype=torch.float)

        total = model(xi, xv)
        loss = criterion(total, y)
        loss_score.append(loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
示例#3
0
    valloader = DataLoader(dataset=valset,
                           batch_size=args.batch_size,
                           shuffle=False,
                           num_workers=args.num_workers,
                           drop_last=False)

    print('[init model]')
    model = DeepFM(num_fields=args.num_fields,
                   num_features=args.num_features,
                   embedding_dim=args.embedding_dim,
                   out_features=args.out_features,
                   hidden_units=args.hidden_units,
                   dropout_rates=args.dropout_rates).to(args.device)

    print('[init optimizer]')
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    print('[init criterion]')
    criterion = nn.BCEWithLogitsLoss()

    print('[init engines]')
    trainer = Trainer(model=model,
                      optimizer=optimizer,
                      criterion=criterion,
                      device=args.device)
    evaluator = Evaluator(model=model, criterion=criterion, device=args.device)

    print('[set handlers]')
    set_handlers(trainer=trainer,
                 evaluator=evaluator,
                 valloader=valloader,
示例#4
0
train_dataset = custom_dataset(train_cat, train_dense, train_y, if_y= True)
train_loader = DataLoader(train_dataset, batch_size= batch_size, shuffle= True, num_workers=2)

test_dataset = custom_dataset(test_cat, test_dense, if_y= False)
test_loader = DataLoader(test_dataset, batch_size= 512, shuffle= False, num_workers=2)

model= DeepFM(
	cat_fields= cat_fields, 
	num_contns= num_contns, 
	k= k, 
	hidden_dims= hidden_dims, 
	dropout= p, 
	n_class= n_class,
	sparse= sparse).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr= lr)
criterion= nn.BCEWithLogitsLoss(pos_weight=torch.tensor(pos_weight, device= device))
print('model created.')
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma= 0.3, verbose= True)

'''training phase'''
for epoch in range(n_epoch):

	model.train()
	train_loss= 0
	train_score= 0
	val_score= 0
	train_preds, train_gts = [], []
	val_preds, val_gts = [], []

	'''train'''