Пример #1
0
seed = 20170705
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
train_file = "train_large.txt"
feature_sizes_file = "feature_sizes_large.txt"
debug = False
#train_file = "train.txt"
#feature_sizes_file = "feature_sizes.txt"
#debug = True

# load data
train_data = CriteoDataset('./data', train=True, train_file=train_file)

# split trani and valid set
train_idx, valid_idx = split_train_and_valid(train_data, debug)

# loader
loader_train = DataLoader(train_data, batch_size=256, sampler=sampler.SubsetRandomSampler(train_idx), num_workers=0)
loader_val = DataLoader(train_data, batch_size=1000, sampler=sampler.SubsetRandomSampler(valid_idx), num_workers=0)

feature_sizes = np.loadtxt('./data/{}'.format(feature_sizes_file), delimiter=',')
feature_sizes = [int(x) for x in feature_sizes]
print(feature_sizes)

model = DeepFM(feature_sizes, use_cuda=True, overfitting=debug)
#optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.0)
optimizer = radam.RAdam(model.parameters(), lr=1e-3, weight_decay=0.0)
model.fit(loader_train, loader_val, optimizer, epochs=1000, verbose=True, print_every=1000, checkpoint_dir="./chkp")
Пример #2
0
import torch
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import sampler

from model.DeepFM import DeepFM
from data.dataset import CriteoDataset

# 900000 items for training, 10000 items for valid, of all 1000000 items
Num_train = 900000

# load data
train_data = CriteoDataset('./data', train=True)
loader_train = DataLoader(train_data,
                          batch_size=100,
                          sampler=sampler.SubsetRandomSampler(
                              range(Num_train)))
val_data = CriteoDataset('./data', train=True)
loader_val = DataLoader(val_data,
                        batch_size=100,
                        sampler=sampler.SubsetRandomSampler(
                            range(Num_train, 1000000)))

feature_sizes = np.loadtxt('./data/feature_sizes.txt', delimiter=',')
feature_sizes = [int(x) for x in feature_sizes]
print(feature_sizes)

model = DeepFM(feature_sizes, use_cuda=False)
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=0.0)
model.fit(loader_train, loader_val, optimizer, epochs=5, verbose=True)