def __init__(self, samples, sa=None, fa=None, a=None): """ A Dataset might have an arbitrary number of attributes for samples, features, or the dataset as a whole. However, only the data samples themselves are required. Parameters ---------- samples : ndarray Data samples. This has to be a two-dimensional (samples x features) array. If the samples are not in that format, please consider one of the `AttrDataset.from_*` classmethods. sa : SampleAttributesCollection Samples attributes collection. fa : FeatureAttributesCollection Features attributes collection. a : DatasetAttributesCollection Dataset attributes collection. """ # conversions if isinstance(samples, list): samples = np.array(samples) # Check all conditions we need to have for `samples` dtypes if not hasattr(samples, 'dtype'): raise ValueError( "AttrDataset only supports dtypes as samples that have a " "`dtype` attribute that behaves similar to the one of an " "array-like.") if not hasattr(samples, 'shape'): raise ValueError( "AttrDataset only supports dtypes as samples that have a " "`shape` attribute that behaves similar to the one of an " "array-like.") if not len(samples.shape): raise ValueError("Only `samples` with at least one axis are " "supported (got: %i)" % len(samples.shape)) # handling of 1D-samples # i.e. 1D is treated as multiple samples with a single feature if len(samples.shape) == 1: samples = np.atleast_2d(samples).T # that's all -- accepted self.samples = samples # Everything in a dataset (except for samples) is organized in # collections # Number of samples is .shape[0] for sparse matrix support self.sa = SampleAttributesCollection(length=len(self)) if not sa is None: self.sa.update(sa) self.fa = FeatureAttributesCollection(length=self.nfeatures) if not fa is None: self.fa.update(fa) self.a = DatasetAttributesCollection() if not a is None: self.a.update(a)