def get_loaders(dataset_name, dataset_dir, batch_size): if dataset_name == 'movielens1m': dataset_path = path.join(dataset_dir, 'ml-1m', 'ratings.dat') dataset = MovieLens1MDataset(dataset_path) elif dataset_name == 'criteo': dataset_path = path.join(dataset_dir, 'criteo', 'train.txt') dataset = CriteoDataset(dataset_path, cache_path=str(Path.home()) + '/.criteo') else: raise ValueError(f"Unknown dataset {dataset_name}!") train_length, validation_length = int(len(dataset) * 0.8), int( len(dataset) * 0.1) test_length = len(dataset) - train_length - validation_length train_dataset, validation_dataset, test_dataset = random_split( dataset, (train_length, validation_length, test_length)) train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=8) validation_data_loader = DataLoader(validation_dataset, batch_size=batch_size, num_workers=8) test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=8) features_dimension = dataset.field_dims return features_dimension, (train_data_loader, validation_data_loader, test_data_loader)
def get_dataset(name, path): if name == 'movielens1M': return MovieLens1MDataset(path) elif name == 'movielens20M': return MovieLens20MDataset(path) elif name == 'criteo': return CriteoDataset(path) elif name == 'avazu': return AvazuDataset(path) else: raise ValueError('unknown dataset name: ' + name)
def get_dataset(name, path): if name == 'movielens1M': return MovieLens1MDataset(path) elif name == 'movielens20M': return MovieLens20MDataset(path) elif name == 'flow': return FlowDataset(path) elif name == 'criteo': return CriteoDataset(path, cache_path='.criteo', predict=False) elif name == 'avazu': return AvazuDataset(path) else: raise ValueError('unknown dataset name: ' + name)
def get_dataset(name, path): if not os.path.exists(os.path.dirname(path)): try: os.makedirs(os.path.dirname(path)) except: pass if name == 'movielens1M': return MovieLens1MDataset(path) elif name == 'movielens20M': return MovieLens20MDataset(path) elif name == 'criteo': return CriteoDataset(path) elif name == 'avazu': return AvazuDataset(path) else: raise ValueError('unknown dataset name: ' + name)