def __init__( self, paths_or_dataset, batch_size, label_names, feature_columns=None, cat_names=None, cont_names=None, engine=None, shuffle=True, seed_fn=None, buffer_size=0.1, device=None, parts_per_chunk=1, reader_kwargs=None, global_size=None, global_rank=None, drop_last=False, sparse_names=None, sparse_max=None, sparse_as_dense=False, ): dataset = _validate_dataset( paths_or_dataset, batch_size, buffer_size, engine, reader_kwargs ) cat_names, cont_names = _validate_schema(feature_columns, cat_names, cont_names) # sort the ccolumns to avoid getting incorrect output # (https://github.com/NVIDIA/NVTabular/issues/412) cat_names = _get_embedding_order(cat_names) cont_names = _get_embedding_order(cont_names) device = device or 0 DataLoader.__init__( self, dataset, cat_names, cont_names, label_names, batch_size, shuffle, seed_fn=seed_fn, parts_per_chunk=parts_per_chunk, device=device, global_size=global_size, global_rank=global_rank, drop_last=drop_last, sparse_names=sparse_names, sparse_max=sparse_max, sparse_as_dense=sparse_as_dense, ) self._map_fns = []
def __init__( self, paths_or_dataset, batch_size, label_names=None, feature_columns=None, cat_names=None, cont_names=None, engine=None, shuffle=True, seed_fn=None, buffer_size=0.1, device=None, parts_per_chunk=1, reader_kwargs=None, global_size=None, global_rank=None, drop_last=False, sparse_names=None, sparse_max=None, sparse_as_dense=False, schema=None, ): dataset = _validate_dataset(paths_or_dataset, batch_size, buffer_size, engine, reader_kwargs) schema = _get_schema(dataset) if not schema else schema cat_names, cont_names = _validate_schema(feature_columns, cat_names, cont_names, schema=schema) device = device or 0 device = "cpu" if not HAS_GPU else device DataLoader.__init__( self, dataset, batch_size, shuffle, cat_names=cat_names, cont_names=cont_names, label_names=label_names, seed_fn=seed_fn, parts_per_chunk=parts_per_chunk, device=device, global_size=global_size, global_rank=global_rank, drop_last=drop_last, sparse_names=sparse_names, sparse_max=sparse_max, sparse_as_dense=sparse_as_dense, ) self._map_fns = []
def __init__( self, paths_or_dataset, batch_size, label_names, feature_columns=None, cat_names=None, cont_names=None, engine=None, shuffle=True, buffer_size=0.1, workflows=None, devices=None, parts_per_chunk=1, reader_kwargs=None, ): dataset = _validate_dataset(paths_or_dataset, batch_size, buffer_size, engine, reader_kwargs) cat_names, cont_names = _validate_schema(feature_columns, cat_names, cont_names) # sort the ccolumns to avoid getting incorrect output # (https://github.com/NVIDIA/NVTabular/issues/412) cat_names = _get_embedding_order(cat_names) cont_names = _get_embedding_order(cont_names) assert devices is None or len( devices) == 1 # TODO: figure out multi-gpu support devices = devices or [0] DataLoader.__init__( self, dataset, cat_names, cont_names, label_names, batch_size, shuffle, parts_per_chunk=parts_per_chunk, workflows=workflows, devices=devices, )